Skip to content

Commit cc7e836

Browse files
authored
[RISCV] Select mask operands as virtual registers and eliminate uses of vmv0 (llvm#125026)
This is another attempt at llvm#88496 to keep mask operands in SSA after instruction selection. Previously we selected the mask operands into vmv0, a singleton register class with exactly one register, V0. But the register allocator doesn't really support singleton register classes and we ran into errors like "ran out of registers during register allocation in function". This avoids this by introducing a pass just before register allocation that converts any use of vmv0 to a copy to $v0, i.e. what isel currently does today. That way the register allocator doesn't need to deal with the singleton register class, but we get the benefits of having the mask registers in SSA throughout the backend: - This allows RISCVVLOptimizer to reduce the VLs of instructions that define mask registers - It enables CSE and code sinking in more places - It removes the need to peek through mask copies in RISCVISelDAGToDAG and keep track of V0 defs in RISCVVectorPeephole This patch initially eliminates uses of vmv0s after RISCVVectorPeephole to keep the diff to a minimum, and a follow up patch will move it past the other MachineInstr SSA passes. Note that it doesn't try to remove any defs of vmv0 as we shouldn't have any instructions that have any vmv0 outputs. As a further follow up, we can move the elimination pass to after phi elimination and outside of SSA, which would unblock the pre-RA scheduler around masked pseudos. This might also help the issue that RISCVVectorMaskDAGMutation tries to solve.
1 parent acd34d9 commit cc7e836

25 files changed

+738
-661
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ add_llvm_target(RISCVCodeGen
6363
RISCVVectorMaskDAGMutation.cpp
6464
RISCVVectorPeephole.cpp
6565
RISCVVLOptimizer.cpp
66+
RISCVVMV0Elimination.cpp
6667
RISCVZacasABIFix.cpp
6768
GISel/RISCVCallLowering.cpp
6869
GISel/RISCVInstructionSelector.cpp

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);
107107

108108
FunctionPass *createRISCVVLOptimizerPass();
109109
void initializeRISCVVLOptimizerPass(PassRegistry &);
110+
111+
FunctionPass *createRISCVVMV0EliminationPass();
112+
void initializeRISCVVMV0EliminationPass(PassRegistry &);
110113
} // namespace llvm
111114

112115
#endif

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 15 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
254254
bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
255255
bool IsLoad, MVT *IndexVT) {
256256
SDValue Chain = Node->getOperand(0);
257-
SDValue Glue;
258257

259258
Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
260259

@@ -265,11 +264,8 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
265264
}
266265

267266
if (IsMasked) {
268-
// Mask needs to be copied to V0.
269267
SDValue Mask = Node->getOperand(CurOp++);
270-
Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
271-
Glue = Chain.getValue(1);
272-
Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
268+
Operands.push_back(Mask);
273269
}
274270
SDValue VL;
275271
selectVLOp(Node->getOperand(CurOp++), VL);
@@ -291,8 +287,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
291287
}
292288

293289
Operands.push_back(Chain); // Chain.
294-
if (Glue)
295-
Operands.push_back(Glue);
296290
}
297291

298292
void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
@@ -1844,19 +1838,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
18441838
return;
18451839
}
18461840

1847-
// Mask needs to be copied to V0.
1848-
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1849-
RISCV::V0, Mask, SDValue());
1850-
SDValue Glue = Chain.getValue(1);
1851-
SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1852-
18531841
if (IsCmpConstant) {
18541842
SDValue Imm =
18551843
selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
18561844

18571845
ReplaceNode(Node, CurDAG->getMachineNode(
18581846
VMSGTMaskOpcode, DL, VT,
1859-
{MaskedOff, Src1, Imm, V0, VL, SEW, Glue}));
1847+
{MaskedOff, Src1, Imm, Mask, VL, SEW}));
18601848
return;
18611849
}
18621850

@@ -1867,7 +1855,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
18671855
// the agnostic result can be either undisturbed or all 1.
18681856
SDValue Cmp = SDValue(
18691857
CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1870-
{MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1858+
{MaskedOff, Src1, Src2, Mask, VL, SEW}),
18711859
0);
18721860
// vmxor.mm vd, vd, v0 is used to update active value.
18731861
ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
@@ -3287,12 +3275,10 @@ static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
32873275
return false;
32883276
assert(RISCVII::hasVLOp(TSFlags));
32893277

3290-
bool HasGlueOp = User->getGluedNode() != nullptr;
3291-
unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3278+
unsigned ChainOpIdx = User->getNumOperands() - 1;
32923279
bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
32933280
bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3294-
unsigned VLIdx =
3295-
User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3281+
unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
32963282
const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
32973283

32983284
if (UserOpNo == VLIdx)
@@ -3759,43 +3745,7 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
37593745
return false;
37603746
}
37613747

3762-
// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3763-
// that's glued to the pseudo. This tries to look up the value that was copied
3764-
// to V0.
3765-
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3766-
// Check that we're using V0 as a mask register.
3767-
if (!isa<RegisterSDNode>(MaskOp) ||
3768-
cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3769-
return SDValue();
3770-
3771-
// The glued user defines V0.
3772-
const auto *Glued = GlueOp.getNode();
3773-
3774-
if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3775-
return SDValue();
3776-
3777-
// Check that we're defining V0 as a mask register.
3778-
if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3779-
cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3780-
return SDValue();
3781-
3782-
SDValue MaskSetter = Glued->getOperand(2);
3783-
3784-
// Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3785-
// from an extract_subvector or insert_subvector.
3786-
if (MaskSetter->isMachineOpcode() &&
3787-
MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3788-
MaskSetter = MaskSetter->getOperand(0);
3789-
3790-
return MaskSetter;
3791-
}
3792-
3793-
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3794-
// Check the instruction defining V0; it needs to be a VMSET pseudo.
3795-
SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3796-
if (!MaskSetter)
3797-
return false;
3798-
3748+
static bool usesAllOnesMask(SDValue MaskOp) {
37993749
const auto IsVMSet = [](unsigned Opc) {
38003750
return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
38013751
Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
@@ -3806,14 +3756,7 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
38063756
// TODO: Check that the VMSET is the expected bitwidth? The pseudo has
38073757
// undefined behaviour if it's the wrong bitwidth, so we could choose to
38083758
// assume that it's all-ones? Same applies to its VL.
3809-
return MaskSetter->isMachineOpcode() &&
3810-
IsVMSet(MaskSetter.getMachineOpcode());
3811-
}
3812-
3813-
// Return true if we can make sure mask of N is all-ones mask.
3814-
static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3815-
return usesAllOnesMask(N->getOperand(MaskOpIdx),
3816-
N->getOperand(N->getNumOperands() - 1));
3759+
return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
38173760
}
38183761

38193762
static bool isImplicitDef(SDValue V) {
@@ -3829,17 +3772,15 @@ static bool isImplicitDef(SDValue V) {
38293772
}
38303773

38313774
// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3832-
// corresponding "unmasked" pseudo versions. The mask we're interested in will
3833-
// take the form of a V0 physical register operand, with a glued
3834-
// register-setting instruction.
3775+
// corresponding "unmasked" pseudo versions.
38353776
bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
38363777
const RISCV::RISCVMaskedPseudoInfo *I =
38373778
RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
38383779
if (!I)
38393780
return false;
38403781

38413782
unsigned MaskOpIdx = I->MaskOpIdx;
3842-
if (!usesAllOnesMask(N, MaskOpIdx))
3783+
if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
38433784
return false;
38443785

38453786
// There are two classes of pseudos in the table - compares and
@@ -3863,18 +3804,13 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
38633804
// Skip the passthru operand at index 0 if the unmasked don't have one.
38643805
bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
38653806
for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
3866-
// Skip the mask, and the Glue.
3807+
// Skip the mask
38673808
SDValue Op = N->getOperand(I);
3868-
if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3809+
if (I == MaskOpIdx)
38693810
continue;
38703811
Ops.push_back(Op);
38713812
}
38723813

3873-
// Transitively apply any node glued to our new node.
3874-
const auto *Glued = N->getGluedNode();
3875-
if (auto *TGlued = Glued->getGluedNode())
3876-
Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3877-
38783814
MachineSDNode *Result =
38793815
CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
38803816

@@ -3910,17 +3846,13 @@ static bool IsVMerge(SDNode *N) {
39103846
// The resulting policy is the effective policy the vmerge would have had,
39113847
// i.e. whether or not it's passthru operand was implicit-def.
39123848
bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3913-
SDValue Passthru, False, True, VL, Mask, Glue;
3849+
SDValue Passthru, False, True, VL, Mask;
39143850
assert(IsVMerge(N));
39153851
Passthru = N->getOperand(0);
39163852
False = N->getOperand(1);
39173853
True = N->getOperand(2);
39183854
Mask = N->getOperand(3);
39193855
VL = N->getOperand(4);
3920-
// We always have a glue node for the mask at v0.
3921-
Glue = N->getOperand(N->getNumOperands() - 1);
3922-
assert(cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3923-
assert(Glue.getValueType() == MVT::Glue);
39243856

39253857
// If the EEW of True is different from vmerge's SEW, then we can't fold.
39263858
if (True.getSimpleValueType() != N->getSimpleValueType(0))
@@ -3963,12 +3895,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
39633895
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
39643896
return false;
39653897

3966-
// The last operand of a masked instruction may be glued.
3967-
bool HasGlueOp = True->getGluedNode() != nullptr;
3968-
3969-
// The chain operand may exist either before the glued operands or in the last
3970-
// position.
3971-
unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3898+
unsigned TrueChainOpIdx = True.getNumOperands() - 1;
39723899
bool HasChainOp =
39733900
True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
39743901

@@ -3980,15 +3907,14 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
39803907
LoopWorklist.push_back(False.getNode());
39813908
LoopWorklist.push_back(Mask.getNode());
39823909
LoopWorklist.push_back(VL.getNode());
3983-
LoopWorklist.push_back(Glue.getNode());
39843910
if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
39853911
return false;
39863912
}
39873913

39883914
// The vector policy operand may be present for masked intrinsics
39893915
bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
39903916
unsigned TrueVLIndex =
3991-
True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3917+
True.getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
39923918
SDValue TrueVL = True.getOperand(TrueVLIndex);
39933919
SDValue SEW = True.getOperand(TrueVLIndex + 1);
39943920

@@ -4020,7 +3946,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
40203946
if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
40213947
return false;
40223948
if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
4023-
(Mask && !usesAllOnesMask(Mask, Glue)))
3949+
(Mask && !usesAllOnesMask(Mask)))
40243950
return false;
40253951

40263952
// Make sure it doesn't raise any observable fp exceptions, since changing the
@@ -4077,9 +4003,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
40774003
if (HasChainOp)
40784004
Ops.push_back(True.getOperand(TrueChainOpIdx));
40794005

4080-
// Add the glue for the CopyToReg of mask->v0.
4081-
Ops.push_back(Glue);
4082-
40834006
MachineSDNode *Result =
40844007
CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
40854008
Result->setFlags(True->getFlags());

0 commit comments

Comments
 (0)