Skip to content

Commit 518292d

Browse files
author
QingShan Zhang
committed
[PowerPC] Add the MacroFusion support for Power8
This patch is intend to implement the missing P8 MacroFusion for LLVM according to Power8 User's Manual Section 10.1.12 Instruction Fusion Differential Revision: https://reviews.llvm.org/D70651
1 parent f481256 commit 518292d

File tree

9 files changed

+319
-1
lines changed

9 files changed

+319
-1
lines changed

llvm/lib/Target/PowerPC/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ add_llvm_target(PowerPCCodeGen
3333
PPCMCInstLower.cpp
3434
PPCMachineFunctionInfo.cpp
3535
PPCMachineScheduler.cpp
36+
PPCMacroFusion.cpp
3637
PPCMIPeephole.cpp
3738
PPCRegisterInfo.cpp
3839
PPCQPXLoadSplat.cpp

llvm/lib/Target/PowerPC/PPC.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,16 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
166166
"Enable Hardware Transactional Memory instructions">;
167167
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
168168
"Implement mftb using the mfspr instruction">;
169+
def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
170+
"Target supports instruction fusion">;
171+
def FeatureAddiLoadFusion : SubtargetFeature<"fuse-addi-load",
172+
"HasAddiLoadFusion", "true",
173+
"Power8 Addi-Load fusion",
174+
[FeatureFusion]>;
175+
def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
176+
"HasAddisLoadFusion", "true",
177+
"Power8 Addis-Load fusion",
178+
[FeatureFusion]>;
169179
def FeatureUnalignedFloats :
170180
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
171181
"true", "CPU does not trap on unaligned FP access">;
@@ -279,7 +289,8 @@ def ProcessorFeatures {
279289
FeatureDirectMove,
280290
FeatureICBT,
281291
FeaturePartwordAtomic];
282-
list<SubtargetFeature> P8SpecificFeatures = [];
292+
list<SubtargetFeature> P8SpecificFeatures = [FeatureAddiLoadFusion,
293+
FeatureAddisLoadFusion];
283294
list<SubtargetFeature> P8InheritableFeatures =
284295
!listconcat(P7InheritableFeatures, P8AdditionalFeatures);
285296
list<SubtargetFeature> P8Features =
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
//===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file This file contains the PowerPC implementation of the DAG scheduling
10+
/// mutation to pair instructions back to back.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "PPC.h"
15+
#include "PPCSubtarget.h"
16+
#include "llvm/ADT/DenseSet.h"
17+
#include "llvm/CodeGen/MacroFusion.h"
18+
19+
using namespace llvm;
20+
namespace {
21+
22+
class FusionFeature {
23+
public:
24+
typedef SmallDenseSet<unsigned> FusionOpSet;
25+
26+
enum FusionKind {
27+
#define FUSION_KIND(KIND) FK_##KIND
28+
#define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \
29+
FUSION_KIND(KIND),
30+
#include "PPCMacroFusion.def"
31+
FUSION_KIND(END)
32+
};
33+
private:
34+
// Each fusion feature is assigned with one fusion kind. All the
35+
// instructions with the same fusion kind have the same fusion characteristic.
36+
FusionKind Kd;
37+
// True if this feature is enabled.
38+
bool Supported;
39+
// li rx, si
40+
// load rt, ra, rx
41+
// The dependent operand index in the second op(load). And the negative means
42+
// it could be any one.
43+
int DepOpIdx;
44+
// The first fusion op set.
45+
FusionOpSet OpSet1;
46+
// The second fusion op set.
47+
FusionOpSet OpSet2;
48+
public:
49+
FusionFeature(FusionKind Kind, bool HasFeature, int Index,
50+
const FusionOpSet &First, const FusionOpSet &Second) :
51+
Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
52+
OpSet2(Second) {}
53+
54+
bool hasOp1(unsigned Opc) const { return OpSet1.count(Opc) != 0; }
55+
bool hasOp2(unsigned Opc) const { return OpSet2.count(Opc) != 0; }
56+
bool isSupported() const { return Supported; }
57+
Optional<unsigned> depOpIdx() const {
58+
if (DepOpIdx < 0)
59+
return None;
60+
return DepOpIdx;
61+
}
62+
63+
FusionKind getKind() const { return Kd; }
64+
};
65+
66+
static bool matchingRegOps(const MachineInstr &FirstMI,
67+
int FirstMIOpIndex,
68+
const MachineInstr &SecondMI,
69+
int SecondMIOpIndex) {
70+
const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex);
71+
const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex);
72+
if (!Op1.isReg() || !Op2.isReg())
73+
return false;
74+
75+
return Op1.getReg() == Op2.getReg();
76+
}
77+
78+
// Return true if the FirstMI meets the constraints of SecondMI according to
79+
// fusion specification.
80+
static bool checkOpConstraints(FusionFeature::FusionKind Kd,
81+
const MachineInstr &FirstMI,
82+
const MachineInstr &SecondMI) {
83+
switch (Kd) {
84+
// The hardware didn't require any specific check for the fused instructions'
85+
// operands. Therefore, return true to indicate that, it is fusable.
86+
default: return true;
87+
// [addi rt,ra,si - lxvd2x xt,ra,rb] etc.
88+
case FusionFeature::FK_AddiLoad: {
89+
// lxvd2x(ra) cannot be zero
90+
const MachineOperand &RA = SecondMI.getOperand(1);
91+
if (!RA.isReg())
92+
return true;
93+
94+
return Register::isVirtualRegister(RA.getReg()) ||
95+
(RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8);
96+
}
97+
// [addis rt,ra,si - ld rt,ds(ra)] etc.
98+
case FusionFeature::FK_AddisLoad: {
99+
const MachineOperand &RT = SecondMI.getOperand(0);
100+
if (!RT.isReg())
101+
return true;
102+
103+
// Only check it for non-virtual register.
104+
if (!Register::isVirtualRegister(RT.getReg()))
105+
// addis(rt) = ld(ra) = ld(rt)
106+
// ld(rt) cannot be zero
107+
if (!matchingRegOps(SecondMI, 0, SecondMI, 2) ||
108+
(RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8))
109+
return false;
110+
111+
// addis(si) first 12 bits must be all 1s or all 0s
112+
const MachineOperand &SI = FirstMI.getOperand(2);
113+
if (!SI.isImm())
114+
return true;
115+
int64_t Imm = SI.getImm();
116+
if (((Imm & 0xFFF0) != 0) || ((Imm & 0xFFF0) != 0xFFF0))
117+
return false;
118+
119+
// If si = 1111111111110000 and the msb of the d/ds field of the load equals
120+
// 1, then fusion does not occur.
121+
if ((Imm & 0xFFF0) == 0xFFF0) {
122+
const MachineOperand &D = SecondMI.getOperand(1);
123+
if (!D.isImm())
124+
return true;
125+
126+
// 14 bit for DS field, while 16 bit for D field.
127+
int MSB = 15;
128+
if (SecondMI.getOpcode() == PPC::LD)
129+
MSB = 13;
130+
131+
return (D.getImm() & (1ULL << MSB)) == 0;
132+
}
133+
return true;
134+
}
135+
}
136+
137+
llvm_unreachable("All the cases should have been handled");
138+
return true;
139+
}
140+
141+
/// Check if the instr pair, FirstMI and SecondMI, should be fused together.
142+
/// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be
143+
/// part of a fused pair at all.
144+
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
145+
const TargetSubtargetInfo &TSI,
146+
const MachineInstr *FirstMI,
147+
const MachineInstr &SecondMI) {
148+
// We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in
149+
// the def file.
150+
using namespace PPC;
151+
152+
const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI);
153+
static const FusionFeature FusionFeatures[] = {
154+
#define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \
155+
FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\
156+
{ OPSET2 } },
157+
#include "PPCMacroFusion.def"
158+
};
159+
#undef FUSION_KIND
160+
161+
for (auto &Feature : FusionFeatures) {
162+
// Skip if the feature is not supported.
163+
if (!Feature.isSupported())
164+
continue;
165+
166+
// Only when the SecondMI is fusable, we are starting to look for the
167+
// fusable FirstMI.
168+
if (Feature.hasOp2(SecondMI.getOpcode())) {
169+
// If FirstMI == nullptr, that means, we're only checking whether SecondMI
170+
// can be fused at all.
171+
if (!FirstMI)
172+
return true;
173+
174+
// Checking if the FirstMI is fusable with the SecondMI.
175+
if (!Feature.hasOp1(FirstMI->getOpcode()))
176+
continue;
177+
178+
auto DepOpIdx = Feature.depOpIdx();
179+
if (DepOpIdx.hasValue()) {
180+
// Checking if the result of the FirstMI is the desired operand of the
181+
// SecondMI if the DepOpIdx is set. Otherwise, ignore it.
182+
if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
183+
return false;
184+
}
185+
186+
// Checking more on the instruction operands.
187+
if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
188+
return true;
189+
}
190+
}
191+
192+
return false;
193+
}
194+
195+
} // end anonymous namespace
196+
197+
namespace llvm {
198+
199+
std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () {
200+
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
201+
}
202+
203+
} // end namespace llvm
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
//=== ---- PPCMacroFusion.def - PowerPC MacroFuson Candidates -v-*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https)//llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier) Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains descriptions of the macro-fusion pair for PowerPC.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// NOTE: NO INCLUDE GUARD DESIRED!
14+
15+
#ifndef FUSION_FEATURE
16+
17+
// Each FUSION_FEATURE is assigned with one TYPE, and can be enabled/disabled
18+
// by HAS_FEATURE. The instructions pair is fusable only when the opcode
19+
// of the first instruction is in OPSET1, and the second instruction opcode is
20+
// in OPSET2. And if DEP_OP_IDX >=0, we will check the result of first OP is
21+
// the operand of the second op with DEP_OP_IDX as its operand index. We assume
22+
// that the result of the first op is its operand zero.
23+
#define FUSION_FEATURE(TYPE, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2)
24+
25+
#endif
26+
27+
#ifndef FUSION_OP_SET
28+
#define FUSION_OP_SET(...) __VA_ARGS__
29+
#endif
30+
31+
// Power8 User Manual Section 10.1.12, Instruction Fusion
32+
// {addi} followed by one of these {lxvd2x, lxvw4x, lxvdsx, lvebx, lvehx,
33+
// lvewx, lvx, lxsdx}
34+
FUSION_FEATURE(AddiLoad, hasAddiLoadFusion, 2, \
35+
FUSION_OP_SET(ADDI, ADDI8, ADDItocL), \
36+
FUSION_OP_SET(LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX, \
37+
LVX, LXSDX))
38+
39+
// {addis) followed by one of these {ld, lbz, lhz, lwz}
40+
FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
41+
FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
42+
FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
43+
44+
#undef FUSION_FEATURE
45+
#undef FUSION_OP_SET
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===- PPCMacroFusion.h - PowerPC Macro Fusion ----------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file This file contains the PowerPC definition of the DAG scheduling
10+
/// mutation to pair instructions back to back.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "llvm/CodeGen/MachineScheduler.h"
15+
16+
namespace llvm {
17+
18+
/// Note that you have to add:
19+
/// DAG.addMutation(createPowerPCMacroFusionDAGMutation());
20+
/// to PPCPassConfig::createMachineScheduler() to have an effect.
21+
std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation();
22+
} // llvm

llvm/lib/Target/PowerPC/PPCSubtarget.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ void PPCSubtarget::initializeEnvironment() {
111111
IsQPXStackUnaligned = false;
112112
HasHTM = false;
113113
HasFloat128 = false;
114+
HasFusion = false;
115+
HasAddiLoadFusion = false;
116+
HasAddisLoadFusion = false;
114117
IsISA3_0 = false;
115118
UseLongCalls = false;
116119
SecurePlt = false;

llvm/lib/Target/PowerPC/PPCSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
135135
bool HasDirectMove;
136136
bool HasHTM;
137137
bool HasFloat128;
138+
bool HasFusion;
139+
bool HasAddiLoadFusion;
140+
bool HasAddisLoadFusion;
138141
bool IsISA3_0;
139142
bool UseLongCalls;
140143
bool SecurePlt;
@@ -306,6 +309,9 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
306309
bool hasFloat128() const { return HasFloat128; }
307310
bool isISA3_0() const { return IsISA3_0; }
308311
bool useLongCalls() const { return UseLongCalls; }
312+
bool hasFusion() const { return HasFusion; }
313+
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
314+
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
309315
bool needsSwapsForVSXMemOps() const {
310316
return hasVSX() && isLittleEndian() && !hasP9Vector();
311317
}

llvm/lib/Target/PowerPC/PPCTargetMachine.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "MCTargetDesc/PPCMCTargetDesc.h"
1515
#include "PPC.h"
1616
#include "PPCMachineScheduler.h"
17+
#include "PPCMacroFusion.h"
1718
#include "PPCSubtarget.h"
1819
#include "PPCTargetObjectFile.h"
1920
#include "PPCTargetTransformInfo.h"
@@ -275,6 +276,9 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
275276
std::make_unique<GenericScheduler>(C));
276277
// add DAG Mutations here.
277278
DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
279+
if (ST.hasFusion())
280+
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
281+
278282
return DAG;
279283
}
280284

@@ -286,6 +290,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler(
286290
std::make_unique<PPCPostRASchedStrategy>(C) :
287291
std::make_unique<PostGenericScheduler>(C), true);
288292
// add DAG Mutations here.
293+
if (ST.hasFusion())
294+
DAG->addMutation(createPowerPCMacroFusionDAGMutation());
289295
return DAG;
290296
}
291297

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; REQUIRES: asserts
2+
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-misched -debug-only=machine-scheduler \
3+
; RUN: -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P8
4+
5+
@m = local_unnamed_addr global i64 0, align 8
6+
7+
define i64 @fuse_addis_ld() {
8+
entry:
9+
; CHECK-P8: ********** MI Scheduling **********
10+
; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry
11+
; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD
12+
; CHECK-P8: SU([[SU0]]): %[[REG3:[0-9]+]]:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, @m
13+
; CHECK-P8: SU([[SU1]]): %{{[0-9]+}}:g8rc = LD target-flags(ppc-toc-lo) @m, %[[REG3]]
14+
; CHECK-P8: ********** MI Scheduling **********
15+
; CHECK-P8-LABEL: fuse_addis_ld:%bb.0 entry
16+
; CHECK-P8: Macro fuse: SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) / ADDIStocHA8 - LD
17+
; CHECK-P8: SU([[SU0]]): renamable $x[[REG3:[0-9]+]] = ADDIStocHA8 $x2, @m
18+
; CHECK-P8: SU([[SU1]]): renamable $x[[REG3]] = LD target-flags(ppc-toc-lo) @m, renamable $x[[REG3]]
19+
%0 = load i64, i64* @m, align 8
20+
ret i64 %0
21+
}

0 commit comments

Comments
 (0)