Skip to content

Commit 1c2617c

Browse files
committed
Address review comments:
- Instead of erasing the old mask, move it after MI. Even though in practice there shouldn't be any other users of V0 due to SelectionDAG emitting a copy for every user, this should preserve the existing value of V0 anyway. This is safe to do because we know there's no other defs of V0 between MaskDef and MI. - Remove isOpSameAs. Turns out we don't need to check if False is NoRegister, because that implicit_def->noreg peephole only operates on passthru operands. - Check that TrueMI and MI are in the same block
1 parent aa4c5db commit 1c2617c

File tree

2 files changed

+154
-36
lines changed

2 files changed

+154
-36
lines changed

llvm/lib/Target/RISCV/RISCVFoldMasks.cpp

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,11 @@ class RISCVFoldMasks : public MachineFunctionPass {
5555
StringRef getPassName() const override { return "RISC-V Fold Masks"; }
5656

5757
private:
58+
bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef);
5859
bool foldVMergeIntoOps(MachineInstr &MI, MachineInstr *MaskDef);
5960
bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
60-
bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef);
6161

6262
bool isAllOnesMask(MachineInstr *MaskDef);
63-
bool isOpSameAs(const MachineOperand &LHS, const MachineOperand &RHS);
6463
};
6564

6665
} // namespace
@@ -119,17 +118,6 @@ static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) {
119118
llvm_unreachable("Unknown VLMUL enum");
120119
}
121120

122-
// Returns true if LHS is the same register as RHS, or if LHS is undefined.
123-
bool RISCVFoldMasks::isOpSameAs(const MachineOperand &LHS,
124-
const MachineOperand &RHS) {
125-
if (LHS.getReg() == RISCV::NoRegister)
126-
return true;
127-
if (RHS.getReg() == RISCV::NoRegister)
128-
return false;
129-
return TRI->lookThruCopyLike(LHS.getReg(), MRI) ==
130-
TRI->lookThruCopyLike(RHS.getReg(), MRI);
131-
}
132-
133121
// Try to fold away VMERGE_VVM instructions. We handle these cases:
134122
// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
135123
// folds to a masked TU instruction. VMERGE_VVM must have have merge operand
@@ -163,10 +151,14 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
163151
return false;
164152

165153
MachineInstr &TrueMI = *MRI->getVRegDef(True->getReg());
154+
if (TrueMI.getParent() != MI.getParent())
155+
return false;
166156

167157
// We require that either merge and false are the same, or that merge
168158
// is undefined.
169-
if (!isOpSameAs(*Merge, *False))
159+
if (Merge->getReg() != RISCV::NoRegister &&
160+
TRI->lookThruCopyLike(Merge->getReg(), MRI) !=
161+
TRI->lookThruCopyLike(False->getReg(), MRI))
170162
return false;
171163

172164
// N must be the only user of True.
@@ -177,21 +169,22 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
177169
const MCInstrDesc &TrueMCID = TrueMI.getDesc();
178170
bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
179171

180-
bool IsMasked = false;
172+
const bool MIIsMasked =
173+
BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef);
174+
bool TrueIsMasked = false;
181175
const RISCV::RISCVMaskedPseudoInfo *Info =
182176
RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
183177
if (!Info && HasTiedDest) {
184178
Info = RISCV::getMaskedPseudoInfo(TrueOpc);
185-
IsMasked = true;
179+
TrueIsMasked = true;
186180
}
187181

188182
if (!Info)
189183
return false;
190184

191185
// When Mask is not a true mask, this transformation is illegal for some
192186
// operations whose results are affected by mask, like viota.m.
193-
if (Info->MaskAffectsResult && BaseOpc == RISCV::VMERGE_VVM &&
194-
!isAllOnesMask(MaskDef))
187+
if (Info->MaskAffectsResult && MIIsMasked)
195188
return false;
196189

197190
MachineOperand &TrueMergeOp = TrueMI.getOperand(1);
@@ -203,20 +196,21 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
203196
return false;
204197
// Both the vmerge instruction and the True instruction must have the same
205198
// merge operand.
206-
if (!isOpSameAs(TrueMergeOp, *False))
199+
if (TrueMergeOp.getReg() != RISCV::NoRegister &&
200+
TrueMergeOp.getReg() != False->getReg())
207201
return false;
208202
}
209203

210-
if (IsMasked) {
204+
if (TrueIsMasked) {
211205
assert(HasTiedDest && "Expected tied dest");
212206
// The vmerge instruction must be TU.
213207
if (Merge->getReg() == RISCV::NoRegister)
214208
return false;
215-
// The vmerge instruction must have an all 1s mask since we're going to keep
216-
// the mask from the True instruction.
217-
// FIXME: Support mask agnostic True instruction which would have an
218-
// undef merge operand.
219-
if (BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef))
209+
// MI must have an all 1s mask since we're going to keep the mask from the
210+
// True instruction.
211+
// FIXME: Support mask agnostic True instruction which would have an undef
212+
// merge operand.
213+
if (MIIsMasked)
220214
return false;
221215
}
222216

@@ -225,10 +219,6 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
225219
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
226220
return false;
227221

228-
// The vector policy operand may be present for masked intrinsics
229-
const MachineOperand &TrueVL =
230-
TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID));
231-
232222
auto GetMinVL =
233223
[](const MachineOperand &LHS,
234224
const MachineOperand &RHS) -> std::optional<MachineOperand> {
@@ -246,7 +236,9 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
246236

247237
// Because MI and True must have the same merge operand (or True's operand is
248238
// implicit_def), the "effective" body is the minimum of their VLs.
249-
const MachineOperand VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
239+
const MachineOperand &TrueVL =
240+
TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID));
241+
const MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
250242
auto MinVL = GetMinVL(TrueVL, VL);
251243
if (!MinVL)
252244
return false;
@@ -255,7 +247,7 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
255247
// If we end up changing the VL or mask of True, then we need to make sure it
256248
// doesn't raise any observable fp exceptions, since changing the active
257249
// elements will affect how fflags is set.
258-
if (VLChanged || !IsMasked)
250+
if (VLChanged || !TrueIsMasked)
259251
if (TrueMCID.mayRaiseFPException() &&
260252
!TrueMI.getFlag(MachineInstr::MIFlag::NoFPExcept))
261253
return false;
@@ -287,8 +279,9 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
287279
// Set the merge to the false operand of the merge.
288280
TrueMI.getOperand(1).setReg(False->getReg());
289281

282+
bool NeedToMoveOldMask = TrueIsMasked;
290283
// If we're converting it to a masked pseudo, reuse MI's mask.
291-
if (!IsMasked) {
284+
if (!TrueIsMasked) {
292285
if (BaseOpc == RISCV::VMV_V_V) {
293286
// If MI is a vmv.v.v, it won't have a mask operand. So insert an all-ones
294287
// mask just before True.
@@ -302,6 +295,7 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
302295
BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(RISCV::COPY),
303296
RISCV::V0)
304297
.addReg(Dest);
298+
NeedToMoveOldMask = true;
305299
}
306300

307301
TrueMI.setDesc(MaskedMCID);
@@ -342,9 +336,17 @@ bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI,
342336
MRI->constrainRegClass(PassthruReg, V0RC);
343337

344338
MRI->replaceRegWith(MI.getOperand(0).getReg(), TrueMI.getOperand(0).getReg());
339+
340+
// We need to move the old mask copy to after MI if:
341+
// - TrueMI is masked and we are using its mask instead
342+
// - We created a new all ones mask that clobbers V0
343+
if (NeedToMoveOldMask && MaskDef) {
344+
assert(MaskDef->getParent() == MI.getParent());
345+
MaskDef->removeFromParent();
346+
MI.getParent()->insertAfter(MI.getIterator(), MaskDef);
347+
}
348+
345349
MI.eraseFromParent();
346-
if (IsMasked)
347-
MaskDef->eraseFromParent();
348350

349351
return true;
350352
}
@@ -369,8 +371,11 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
369371
CASE_VMERGE_TO_VMV(M8)
370372
}
371373

374+
Register MergeReg = MI.getOperand(1).getReg();
375+
Register FalseReg = MI.getOperand(2).getReg();
372376
// Check merge == false (or merge == undef)
373-
if (!isOpSameAs(MI.getOperand(1), MI.getOperand(2)))
377+
if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
378+
TRI->lookThruCopyLike(FalseReg, MRI))
374379
return false;
375380

376381
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
@@ -468,8 +473,9 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
468473

469474
CurrentV0Def = nullptr;
470475
for (MachineInstr &MI : MBB) {
471-
Changed |= convertVMergeToVMv(MI, CurrentV0Def);
472476
Changed |= convertToUnmasked(MI, CurrentV0Def);
477+
Changed |= convertVMergeToVMv(MI, CurrentV0Def);
478+
473479
if (MI.definesRegister(RISCV::V0, TRI))
474480
CurrentV0Def = &MI;
475481
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \
3+
# RUN: -verify-machineinstrs | FileCheck %s
4+
5+
---
6+
name: vmerge_unmasked
7+
body: |
8+
bb.0:
9+
liveins: $x1, $v0, $v8, $v9, $v10
10+
; CHECK-LABEL: name: vmerge_unmasked
11+
; CHECK: liveins: $x1, $v0, $v8, $v9, $v10
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
14+
; CHECK-NEXT: %false:vrnov0 = COPY $v8
15+
; CHECK-NEXT: %mask:vmv0 = COPY $v0
16+
; CHECK-NEXT: $v0 = COPY %mask
17+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 1 /* ta, mu */
18+
%avl:gprnox0 = COPY $x1
19+
%false:vr = COPY $v8
20+
%true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3
21+
%mask:vmv0 = COPY $v0
22+
$v0 = COPY %mask
23+
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5
24+
...
25+
---
26+
name: vmerge_masked
27+
body: |
28+
bb.0:
29+
liveins: $x1, $v0, $v8, $v9, $v10
30+
; CHECK-LABEL: name: vmerge_masked
31+
; CHECK: liveins: $x1, $v0, $v8, $v9, $v10
32+
; CHECK-NEXT: {{ $}}
33+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
34+
; CHECK-NEXT: %false:vrnov0 = COPY $v8
35+
; CHECK-NEXT: %addmask:vmv0 = COPY $v0
36+
; CHECK-NEXT: $v0 = COPY %addmask
37+
; CHECK-NEXT: %mergemask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
38+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 0 /* tu, mu */
39+
; CHECK-NEXT: $v0 = COPY %mergemask
40+
%avl:gprnox0 = COPY $x1
41+
%false:vrnov0 = COPY $v8
42+
%addmask:vmv0 = COPY $v0
43+
$v0 = COPY %addmask
44+
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5, 0
45+
%mergemask:vmv0 = PseudoVMSET_M_B8 %avl, 5
46+
$v0 = COPY %mergemask
47+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %false, %false, %true, $v0, %avl, 5
48+
...
49+
---
50+
name: vmv_unmasked
51+
body: |
52+
bb.0:
53+
liveins: $x1, $v8, $v9, $v10
54+
; CHECK-LABEL: name: vmv_unmasked
55+
; CHECK: liveins: $x1, $v8, $v9, $v10
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
58+
; CHECK-NEXT: %false:vr = COPY $v8
59+
; CHECK-NEXT: [[PseudoVMSET_M_B8_:%[0-9]+]]:vr = PseudoVMSET_M_B8 %avl, 5 /* e32 */
60+
; CHECK-NEXT: $v0 = COPY [[PseudoVMSET_M_B8_]]
61+
; CHECK-NEXT: %true:vr = PseudoVADD_VV_M1 %false, $v9, $v10, %avl, 5 /* e32 */, 0 /* tu, mu */
62+
%avl:gprnox0 = COPY $x1
63+
%false:vr = COPY $v8
64+
%true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3
65+
%x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5, 0
66+
...
67+
---
68+
name: vmv_masked
69+
body: |
70+
bb.0:
71+
liveins: $x1, $v0, $v8, $v9, $v10
72+
; CHECK-LABEL: name: vmv_masked
73+
; CHECK: liveins: $x1, $v0, $v8, $v9, $v10
74+
; CHECK-NEXT: {{ $}}
75+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
76+
; CHECK-NEXT: %false:vrnov0 = COPY $v8
77+
; CHECK-NEXT: %addmask:vmv0 = COPY $v0
78+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 0 /* tu, mu */
79+
; CHECK-NEXT: $v0 = COPY %addmask
80+
%avl:gprnox0 = COPY $x1
81+
%false:vrnov0 = COPY $v8
82+
%addmask:vmv0 = COPY $v0
83+
$v0 = COPY %addmask
84+
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $v9, $v10, $v0, %avl, 5, 3
85+
%x:vrnov0 = PseudoVMV_V_V_M1 %false, %true, %avl, 5, 0
86+
...
87+
---
88+
# Shouldn't fold this because vadd is in another BB
89+
name: different_bb
90+
body: |
91+
; CHECK-LABEL: name: different_bb
92+
; CHECK: bb.0:
93+
; CHECK-NEXT: successors: %bb.1(0x80000000)
94+
; CHECK-NEXT: liveins: $x1, $v0, $v8, $v9, $v10
95+
; CHECK-NEXT: {{ $}}
96+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
97+
; CHECK-NEXT: %false:vr = COPY $v8
98+
; CHECK-NEXT: %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5 /* e32 */, 3 /* ta, ma */
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: bb.1:
101+
; CHECK-NEXT: %mask:vmv0 = COPY $v0
102+
; CHECK-NEXT: $v0 = COPY %mask
103+
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 /* e32 */
104+
bb.0:
105+
liveins: $x1, $v0, $v8, $v9, $v10
106+
%avl:gprnox0 = COPY $x1
107+
%false:vr = COPY $v8
108+
%true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3
109+
bb.1:
110+
%mask:vmv0 = COPY $v0
111+
$v0 = COPY %mask
112+
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5

0 commit comments

Comments
 (0)