Skip to content

Commit 72e6c1c

Browse files
authored
[RISCV] Begin moving post-isel vector peepholes to a MF pass (#70342)
We currently have three postprocess peephole optimisations for vector pseudos: 1) Masked pseudo with all ones mask -> unmasked pseudo 2) Merge vmerge pseudo into operand pseudo's mask 3) vmerge pseudo with all ones mask -> vmv.v.v pseudo This patch aims to move these peepholes out of SelectionDAG and into a separate RISCVFoldMasks MachineFunction pass. There are a few motivations for doing this: * The current SelectionDAG implementation operates on MachineSDNodes, which are essentially MachineInstrs but require a bunch of logic to reason about chain and glue operands. The RISCVII::has*Op helper functions also don't exactly line up with the SDNode operands. Mutating these pseudos and their operands in place becomes a good bit easier at the MachineInstr level. For example, we would no longer need to check for cycles in the DAG during performCombineVMergeAndVOps. * Although it's further down the line, moving this code out of SelectionDAG allows it to be reused by GlobalISel later on. * In performCombineVMergeAndVOps, it may be possible to commute the operands to enable folding in more cases (see test/CodeGen/RISCV/rvv/vmadd-vp.ll). There is existing machinery to commute operands in TII::commuteInstruction, but it's implemented on MachineInstrs. The pass runs straight after ISel, before any of the other machine SSA optimization passes run. This is so that dead-mi-elimination can mop up any vmsets that are no longer used (but if preferred we could try and erase them from inside RISCVFoldMasks itself). This also means that these peepholes are no longer run at codegen -O0, so this patch isn't strictly NFC. Only the performVMergeToVMv peephole is refactored in this patch, the remaining two would be implemented later. And as noted by @preames, it should be possible to move doPeepholeSExtW out of SelectionDAG as well.
1 parent fe8335b commit 72e6c1c

File tree

7 files changed

+254
-36
lines changed

7 files changed

+254
-36
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ add_llvm_target(RISCVCodeGen
3333
RISCVMakeCompressible.cpp
3434
RISCVExpandAtomicPseudoInsts.cpp
3535
RISCVExpandPseudoInsts.cpp
36+
RISCVFoldMasks.cpp
3637
RISCVFrameLowering.cpp
3738
RISCVGatherScatterLowering.cpp
3839
RISCVInsertVSETVLI.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
4545
FunctionPass *createRISCVGatherScatterLoweringPass();
4646
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
4747

48+
FunctionPass *createRISCVFoldMasksPass();
49+
void initializeRISCVFoldMasksPass(PassRegistry &);
50+
4851
FunctionPass *createRISCVOptWInstrsPass();
4952
void initializeRISCVOptWInstrsPass(PassRegistry &);
5053

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This pass performs various peephole optimisations that fold masks into vector
10+
// pseudo instructions after instruction selection.
11+
//
12+
// Currently it converts
13+
// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
14+
// ->
15+
// PseudoVMV_V_V %false, %true, %vl, %sew
16+
//
17+
//===---------------------------------------------------------------------===//
18+
19+
#include "RISCV.h"
20+
#include "RISCVSubtarget.h"
21+
#include "llvm/CodeGen/MachineFunctionPass.h"
22+
#include "llvm/CodeGen/MachineRegisterInfo.h"
23+
#include "llvm/CodeGen/TargetInstrInfo.h"
24+
#include "llvm/CodeGen/TargetRegisterInfo.h"
25+
26+
using namespace llvm;
27+
28+
#define DEBUG_TYPE "riscv-fold-masks"
29+
30+
namespace {
31+
32+
class RISCVFoldMasks : public MachineFunctionPass {
33+
public:
34+
static char ID;
35+
const TargetInstrInfo *TII;
36+
MachineRegisterInfo *MRI;
37+
const TargetRegisterInfo *TRI;
38+
RISCVFoldMasks() : MachineFunctionPass(ID) {
39+
initializeRISCVFoldMasksPass(*PassRegistry::getPassRegistry());
40+
}
41+
42+
bool runOnMachineFunction(MachineFunction &MF) override;
43+
MachineFunctionProperties getRequiredProperties() const override {
44+
return MachineFunctionProperties().set(
45+
MachineFunctionProperties::Property::IsSSA);
46+
}
47+
48+
StringRef getPassName() const override { return "RISC-V Fold Masks"; }
49+
50+
private:
51+
bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
52+
53+
bool isAllOnesMask(MachineInstr *MaskCopy);
54+
};
55+
56+
} // namespace
57+
58+
char RISCVFoldMasks::ID = 0;
59+
60+
INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
61+
62+
bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
63+
if (!MaskCopy)
64+
return false;
65+
assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0);
66+
Register SrcReg =
67+
TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI);
68+
if (!SrcReg.isVirtual())
69+
return false;
70+
MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
71+
if (!SrcDef)
72+
return false;
73+
74+
// TODO: Check that the VMSET is the expected bitwidth? The pseudo has
75+
// undefined behaviour if it's the wrong bitwidth, so we could choose to
76+
// assume that it's all-ones? Same applies to its VL.
77+
switch (SrcDef->getOpcode()) {
78+
case RISCV::PseudoVMSET_M_B1:
79+
case RISCV::PseudoVMSET_M_B2:
80+
case RISCV::PseudoVMSET_M_B4:
81+
case RISCV::PseudoVMSET_M_B8:
82+
case RISCV::PseudoVMSET_M_B16:
83+
case RISCV::PseudoVMSET_M_B32:
84+
case RISCV::PseudoVMSET_M_B64:
85+
return true;
86+
default:
87+
return false;
88+
}
89+
}
90+
91+
// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
92+
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
93+
bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
94+
#define CASE_VMERGE_TO_VMV(lmul) \
95+
case RISCV::PseudoVMERGE_VVM_##lmul: \
96+
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
97+
break;
98+
unsigned NewOpc;
99+
switch (MI.getOpcode()) {
100+
default:
101+
llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
102+
CASE_VMERGE_TO_VMV(MF8)
103+
CASE_VMERGE_TO_VMV(MF4)
104+
CASE_VMERGE_TO_VMV(MF2)
105+
CASE_VMERGE_TO_VMV(M1)
106+
CASE_VMERGE_TO_VMV(M2)
107+
CASE_VMERGE_TO_VMV(M4)
108+
CASE_VMERGE_TO_VMV(M8)
109+
}
110+
111+
Register MergeReg = MI.getOperand(1).getReg();
112+
Register FalseReg = MI.getOperand(2).getReg();
113+
// Check merge == false (or merge == undef)
114+
if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
115+
TRI->lookThruCopyLike(FalseReg, MRI))
116+
return false;
117+
118+
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
119+
if (!isAllOnesMask(V0Def))
120+
return false;
121+
122+
MI.setDesc(TII->get(NewOpc));
123+
MI.removeOperand(1); // Merge operand
124+
MI.tieOperands(0, 1); // Tie false to dest
125+
MI.removeOperand(3); // Mask operand
126+
MI.addOperand(
127+
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
128+
129+
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
130+
// register class for the destination and merge operands e.g. VRNoV0 -> VR
131+
MRI->recomputeRegClass(MI.getOperand(0).getReg());
132+
MRI->recomputeRegClass(MI.getOperand(1).getReg());
133+
return true;
134+
}
135+
136+
bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
137+
if (skipFunction(MF.getFunction()))
138+
return false;
139+
140+
// Skip if the vector extension is not enabled.
141+
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
142+
if (!ST.hasVInstructions())
143+
return false;
144+
145+
TII = ST.getInstrInfo();
146+
MRI = &MF.getRegInfo();
147+
TRI = MRI->getTargetRegisterInfo();
148+
149+
bool Changed = false;
150+
151+
// Masked pseudos coming out of isel will have their mask operand in the form:
152+
//
153+
// $v0:vr = COPY %mask:vr
154+
// %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr
155+
//
156+
// Because $v0 isn't in SSA, keep track of it so we can check the mask operand
157+
// on each pseudo.
158+
MachineInstr *CurrentV0Def;
159+
for (MachineBasicBlock &MBB : MF) {
160+
CurrentV0Def = nullptr;
161+
for (MachineInstr &MI : MBB) {
162+
unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
163+
if (BaseOpc == RISCV::VMERGE_VVM)
164+
Changed |= convertVMergeToVMv(MI, CurrentV0Def);
165+
166+
if (MI.definesRegister(RISCV::V0, TRI))
167+
CurrentV0Def = &MI;
168+
}
169+
}
170+
171+
return Changed;
172+
}
173+
174+
FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); }

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3685,40 +3685,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
36853685
return true;
36863686
}
36873687

3688-
// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
3689-
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
3690-
bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
3691-
#define CASE_VMERGE_TO_VMV(lmul) \
3692-
case RISCV::PseudoVMERGE_VVM_##lmul: \
3693-
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
3694-
break;
3695-
unsigned NewOpc;
3696-
switch (N->getMachineOpcode()) {
3697-
default:
3698-
llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
3699-
CASE_VMERGE_TO_VMV(MF8)
3700-
CASE_VMERGE_TO_VMV(MF4)
3701-
CASE_VMERGE_TO_VMV(MF2)
3702-
CASE_VMERGE_TO_VMV(M1)
3703-
CASE_VMERGE_TO_VMV(M2)
3704-
CASE_VMERGE_TO_VMV(M4)
3705-
CASE_VMERGE_TO_VMV(M8)
3706-
}
3707-
3708-
if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
3709-
return false;
3710-
3711-
SDLoc DL(N);
3712-
SDValue PolicyOp =
3713-
CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
3714-
SDNode *Result = CurDAG->getMachineNode(
3715-
NewOpc, DL, N->getValueType(0),
3716-
{N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
3717-
PolicyOp});
3718-
ReplaceUses(N, Result);
3719-
return true;
3720-
}
3721-
37223688
bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
37233689
bool MadeChange = false;
37243690
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -3730,8 +3696,6 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
37303696

37313697
if (IsVMerge(N) || IsVMv(N))
37323698
MadeChange |= performCombineVMergeAndVOps(N);
3733-
if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
3734-
MadeChange |= performVMergeToVMv(N);
37353699
}
37363700
return MadeChange;
37373701
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
101101
initializeRISCVOptWInstrsPass(*PR);
102102
initializeRISCVPreRAExpandPseudoPass(*PR);
103103
initializeRISCVExpandPseudoPass(*PR);
104+
initializeRISCVFoldMasksPass(*PR);
104105
initializeRISCVInsertVSETVLIPass(*PR);
105106
initializeRISCVInsertReadWriteCSRPass(*PR);
106107
initializeRISCVDAGToDAGISelPass(*PR);
@@ -414,7 +415,10 @@ void RISCVPassConfig::addPreEmitPass2() {
414415
}
415416

416417
void RISCVPassConfig::addMachineSSAOptimization() {
418+
addPass(createRISCVFoldMasksPass());
419+
417420
TargetPassConfig::addMachineSSAOptimization();
421+
418422
if (EnableMachineCombiner)
419423
addPass(&MachineCombinerID);
420424

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
; CHECK-NEXT: Lazy Block Frequency Analysis
8383
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
8484
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
85+
; CHECK-NEXT: RISC-V Fold Masks
8586
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
8687
; CHECK-NEXT: Early Tail Duplication
8788
; CHECK-NEXT: Optimize machine instruction PHIs
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \
3+
# RUN: -verify-machineinstrs | FileCheck %s
4+
5+
---
6+
name: undef_passthru
7+
body: |
8+
bb.0:
9+
liveins: $x1, $v8, $v9
10+
; CHECK-LABEL: name: undef_passthru
11+
; CHECK: liveins: $x1, $v8, $v9
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: %false:vr = COPY $v8
14+
; CHECK-NEXT: %true:vr = COPY $v9
15+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
16+
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
17+
; CHECK-NEXT: $v0 = COPY %mask
18+
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
19+
%false:vr = COPY $v8
20+
%true:vr = COPY $v9
21+
%avl:gprnox0 = COPY $x1
22+
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
23+
$v0 = COPY %mask
24+
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5
25+
...
26+
---
27+
name: undef_false
28+
body: |
29+
bb.0:
30+
liveins: $x1, $v8, $v9
31+
; CHECK-LABEL: name: undef_false
32+
; CHECK: liveins: $x1, $v8, $v9
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
35+
; CHECK-NEXT: %false:vr = COPY $noreg
36+
; CHECK-NEXT: %true:vr = COPY $v9
37+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
38+
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
39+
; CHECK-NEXT: $v0 = COPY %mask
40+
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 /* e32 */
41+
%pt:vrnov0 = COPY $v8
42+
%false:vr = COPY $noreg
43+
%true:vr = COPY $v9
44+
%avl:gprnox0 = COPY $x1
45+
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
46+
$v0 = COPY %mask
47+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
48+
...
49+
---
50+
name: equal_passthru_false
51+
body: |
52+
bb.0:
53+
liveins: $x1, $v8, $v9
54+
; CHECK-LABEL: name: equal_passthru_false
55+
; CHECK: liveins: $x1, $v8, $v9
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: %false:vr = COPY $v8
58+
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
59+
; CHECK-NEXT: %true:vr = COPY $v9
60+
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
61+
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
62+
; CHECK-NEXT: $v0 = COPY %mask
63+
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
64+
%false:vr = COPY $v8
65+
%pt:vrnov0 = COPY $v8
66+
%true:vr = COPY $v9
67+
%avl:gprnox0 = COPY $x1
68+
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
69+
$v0 = COPY %mask
70+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
71+
...

0 commit comments

Comments
 (0)