Skip to content

Commit 2949720

Browse files
authored
[RISCV] Move vmerge same mask peephole to RISCVVectorPeephole (#106108)
We currently fold a vmerge.vvm into its true operand if the true operand is a masked pseudo with the same mask. We can move this over to RISCVVectorPeephole by instead splitting it up into a smaller peephole which converts it to a vmv.v.v first. The existing foldVMV_V_V peephole will then take care of folding it if needed. This is very similar to the existing all-ones mask peephole and we could potentially do it inside of it. I opted to put it in a separate peephole to make it easier to reason about, given that the duplication is small, but I could be persuaded either way.
1 parent a84baef commit 2949720

File tree

4 files changed

+148
-60
lines changed

4 files changed

+148
-60
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3833,15 +3833,8 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
38333833
uint64_t TrueTSFlags = TrueMCID.TSFlags;
38343834
bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
38353835

3836-
bool IsMasked = false;
38373836
const RISCV::RISCVMaskedPseudoInfo *Info =
38383837
RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3839-
if (!Info && HasTiedDest) {
3840-
Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3841-
IsMasked = true;
3842-
}
3843-
assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3844-
38453838
if (!Info)
38463839
return false;
38473840

@@ -3853,19 +3846,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
38533846
return false;
38543847
}
38553848

3856-
// If True is masked then the vmerge must have either the same mask or an all
3857-
// 1s mask, since we're going to keep the mask from True.
3858-
if (IsMasked) {
3859-
// FIXME: Support mask agnostic True instruction which would have an
3860-
// undef passthru operand.
3861-
SDValue TrueMask =
3862-
getMaskSetter(True->getOperand(Info->MaskOpIdx),
3863-
True->getOperand(True->getNumOperands() - 1));
3864-
assert(TrueMask);
3865-
if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3866-
return false;
3867-
}
3868-
38693849
// Skip if True has side effect.
38703850
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
38713851
return false;
@@ -3930,24 +3910,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
39303910
(Mask && !usesAllOnesMask(Mask, Glue)))
39313911
return false;
39323912

3933-
// If we end up changing the VL or mask of True, then we need to make sure it
3934-
// doesn't raise any observable fp exceptions, since changing the active
3935-
// elements will affect how fflags is set.
3936-
if (TrueVL != VL || !IsMasked)
3937-
if (mayRaiseFPException(True.getNode()) &&
3938-
!True->getFlags().hasNoFPExcept())
3939-
return false;
3913+
// Make sure it doesn't raise any observable fp exceptions, since changing the
3914+
// active elements will affect how fflags is set.
3915+
if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
3916+
return false;
39403917

39413918
SDLoc DL(N);
39423919

3943-
// From the preconditions we checked above, we know the mask and thus glue
3944-
// for the result node will be taken from True.
3945-
if (IsMasked) {
3946-
Mask = True->getOperand(Info->MaskOpIdx);
3947-
Glue = True->getOperand(True->getNumOperands() - 1);
3948-
assert(Glue.getValueType() == MVT::Glue);
3949-
}
3950-
39513920
unsigned MaskedOpc = Info->MaskedPseudo;
39523921
#ifndef NDEBUG
39533922
const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
@@ -3977,8 +3946,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
39773946
Ops.push_back(False);
39783947

39793948
const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3980-
const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3981-
assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3949+
const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
39823950
Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
39833951

39843952
Ops.push_back(Mask);

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 63 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6565
bool convertToVLMAX(MachineInstr &MI) const;
6666
bool convertToWholeRegister(MachineInstr &MI) const;
6767
bool convertToUnmasked(MachineInstr &MI) const;
68-
bool convertVMergeToVMv(MachineInstr &MI) const;
68+
bool convertAllOnesVMergeToVMv(MachineInstr &MI) const;
69+
bool convertSameMaskVMergeToVMv(MachineInstr &MI) const;
6970
bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
7071
bool foldVMV_V_V(MachineInstr &MI);
7172

@@ -342,17 +343,13 @@ bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
342343
return true;
343344
}
344345

345-
// Transform (VMERGE_VVM_<LMUL> pt, false, true, allones, vl, sew) to
346-
// (VMV_V_V_<LMUL> pt, true, vl, sew). It may decrease uses of VMSET.
347-
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
346+
static unsigned getVMV_V_VOpcodeForVMERGE_VVM(const MachineInstr &MI) {
348347
#define CASE_VMERGE_TO_VMV(lmul) \
349348
case RISCV::PseudoVMERGE_VVM_##lmul: \
350-
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
351-
break;
352-
unsigned NewOpc;
349+
return RISCV::PseudoVMV_V_V_##lmul;
353350
switch (MI.getOpcode()) {
354351
default:
355-
return false;
352+
return 0;
356353
CASE_VMERGE_TO_VMV(MF8)
357354
CASE_VMERGE_TO_VMV(MF4)
358355
CASE_VMERGE_TO_VMV(MF2)
@@ -361,14 +358,68 @@ bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
361358
CASE_VMERGE_TO_VMV(M4)
362359
CASE_VMERGE_TO_VMV(M8)
363360
}
361+
}
364362

363+
/// Convert a PseudoVMERGE_VVM with an all ones mask to a PseudoVMV_V_V.
364+
///
365+
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %allones, sew, vl
366+
/// ->
367+
/// %x = PseudoVMV_V_V %passthru, %true, vl, sew, tu_mu
368+
bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
369+
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
370+
if (!NewOpc)
371+
return false;
365372
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
366373
if (!isAllOnesMask(V0Defs.lookup(&MI)))
367374
return false;
368375

369376
MI.setDesc(TII->get(NewOpc));
370-
MI.removeOperand(2); // False operand
371-
MI.removeOperand(3); // Mask operand
377+
MI.removeOperand(2); // False operand
378+
MI.removeOperand(3); // Mask operand
379+
MI.addOperand(
380+
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
381+
382+
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
383+
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
384+
MRI->recomputeRegClass(MI.getOperand(0).getReg());
385+
if (MI.getOperand(1).getReg() != RISCV::NoRegister)
386+
MRI->recomputeRegClass(MI.getOperand(1).getReg());
387+
return true;
388+
}
389+
390+
/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
391+
/// same mask, and the masked pseudo's passthru is the same as the false
392+
/// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V.
393+
///
394+
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
395+
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %mask, vl2, sew
396+
/// ->
397+
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
398+
/// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu
399+
bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) const {
400+
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
401+
if (!NewOpc)
402+
return false;
403+
MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg());
404+
if (!True || !RISCV::getMaskedPseudoInfo(True->getOpcode()) ||
405+
!hasSameEEW(MI, *True))
406+
return false;
407+
408+
// True's passthru needs to be equivalent to False
409+
Register TruePassthruReg = True->getOperand(1).getReg();
410+
Register FalseReg = MI.getOperand(2).getReg();
411+
if (TruePassthruReg != RISCV::NoRegister && TruePassthruReg != FalseReg)
412+
return false;
413+
414+
const MachineInstr *TrueV0Def = V0Defs.lookup(True);
415+
const MachineInstr *MIV0Def = V0Defs.lookup(&MI);
416+
assert(TrueV0Def && TrueV0Def->isCopy() && MIV0Def && MIV0Def->isCopy());
417+
if (TrueV0Def->getOperand(1).getReg() != MIV0Def->getOperand(1).getReg())
418+
return false;
419+
420+
MI.setDesc(TII->get(NewOpc));
421+
MI.removeOperand(2); // False operand
422+
MI.removeOperand(3); // Mask operand
372423
MI.addOperand(
373424
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
374425

@@ -623,7 +674,8 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
623674
Changed |= tryToReduceVL(MI);
624675
Changed |= convertToUnmasked(MI);
625676
Changed |= convertToWholeRegister(MI);
626-
Changed |= convertVMergeToVMv(MI);
677+
Changed |= convertAllOnesVMergeToVMv(MI);
678+
Changed |= convertSameMaskVMergeToVMv(MI);
627679
if (foldUndefPassthruVMV_V_V(MI)) {
628680
Changed |= true;
629681
continue; // MI is erased

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,11 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
6262
; CHECK-NEXT: li a4, 5
6363
; CHECK-NEXT: .LBB1_1: # %vector.body
6464
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
65-
; CHECK-NEXT: vmv1r.v v9, v8
66-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
67-
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
68-
; CHECK-NEXT: vle8.v v10, (a0)
69-
; CHECK-NEXT: vadd.vv v9, v10, v9
70-
; CHECK-NEXT: vse8.v v9, (a0)
65+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
66+
; CHECK-NEXT: vlse8.v v8, (a1), a4, v0.t
67+
; CHECK-NEXT: vle8.v v9, (a0)
68+
; CHECK-NEXT: vadd.vv v8, v9, v8
69+
; CHECK-NEXT: vse8.v v8, (a0)
7170
; CHECK-NEXT: addi a0, a0, 32
7271
; CHECK-NEXT: addi a1, a1, 160
7372
; CHECK-NEXT: bne a0, a2, .LBB1_1
@@ -344,12 +343,11 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
344343
; CHECK-NEXT: li a4, 5
345344
; CHECK-NEXT: .LBB7_1: # %vector.body
346345
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
347-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
348-
; CHECK-NEXT: vle8.v v9, (a1)
349-
; CHECK-NEXT: vmv1r.v v10, v8
350-
; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t
351-
; CHECK-NEXT: vadd.vv v9, v10, v9
352-
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
346+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
347+
; CHECK-NEXT: vle8.v v8, (a1)
348+
; CHECK-NEXT: vlse8.v v9, (a0), a4, v0.t
349+
; CHECK-NEXT: vadd.vv v8, v9, v8
350+
; CHECK-NEXT: vsse8.v v8, (a0), a4, v0.t
353351
; CHECK-NEXT: addi a1, a1, 32
354352
; CHECK-NEXT: addi a0, a0, 160
355353
; CHECK-NEXT: bne a1, a2, .LBB7_1

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,73 @@ body: |
6868
$v0 = COPY %mask
6969
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
7070
...
71+
---
72+
name: same_mask
73+
body: |
74+
bb.0:
75+
liveins: $v8, $v9, $v0
76+
; CHECK-LABEL: name: same_mask
77+
; CHECK: liveins: $v8, $v9, $v0
78+
; CHECK-NEXT: {{ $}}
79+
; CHECK-NEXT: %pt:vr = COPY $v8
80+
; CHECK-NEXT: %false:vrnov0 = COPY $v9
81+
; CHECK-NEXT: %mask:vr = COPY $v0
82+
; CHECK-NEXT: $v0 = COPY %mask
83+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
84+
; CHECK-NEXT: $v0 = COPY %mask
85+
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
86+
%pt:vrnov0 = COPY $v8
87+
%false:vrnov0 = COPY $v9
88+
%mask:vr = COPY $v0
89+
$v0 = COPY %mask
90+
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
91+
$v0 = COPY %mask
92+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
93+
...
94+
---
95+
# Shouldn't be converted because false operands are different
96+
name: same_mask_different_false
97+
body: |
98+
bb.0:
99+
liveins: $v8, $v9, $v0
100+
; CHECK-LABEL: name: same_mask_different_false
101+
; CHECK: liveins: $v8, $v9, $v0
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
104+
; CHECK-NEXT: %false:vrnov0 = COPY $v9
105+
; CHECK-NEXT: %mask:vr = COPY $v0
106+
; CHECK-NEXT: $v0 = COPY %mask
107+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
108+
; CHECK-NEXT: $v0 = COPY %mask
109+
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
110+
%pt:vrnov0 = COPY $v8
111+
%false:vrnov0 = COPY $v9
112+
%mask:vr = COPY $v0
113+
$v0 = COPY %mask
114+
%true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
115+
$v0 = COPY %mask
116+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
117+
...
118+
---
119+
# Shouldn't be converted because EEWs are different
120+
name: same_mask_different_eew
121+
body: |
122+
bb.0:
123+
liveins: $v8, $v9, $v0
124+
; CHECK-LABEL: name: same_mask_different_eew
125+
; CHECK: liveins: $v8, $v9, $v0
126+
; CHECK-NEXT: {{ $}}
127+
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
128+
; CHECK-NEXT: %false:vrnov0 = COPY $v9
129+
; CHECK-NEXT: %mask:vr = COPY $v0
130+
; CHECK-NEXT: $v0 = COPY %mask
131+
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
132+
; CHECK-NEXT: $v0 = COPY %mask
133+
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
134+
%pt:vrnov0 = COPY $v8
135+
%false:vrnov0 = COPY $v9
136+
%mask:vr = COPY $v0
137+
$v0 = COPY %mask
138+
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
139+
$v0 = COPY %mask
140+
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */

0 commit comments

Comments
 (0)