Skip to content

[RISCV] Move vmerge same mask peephole to RISCVVectorPeephole #106108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 5 additions & 37 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3833,15 +3833,8 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
uint64_t TrueTSFlags = TrueMCID.TSFlags;
bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);

bool IsMasked = false;
const RISCV::RISCVMaskedPseudoInfo *Info =
RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
if (!Info && HasTiedDest) {
Info = RISCV::getMaskedPseudoInfo(TrueOpc);
IsMasked = true;
}
assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");

if (!Info)
return false;

Expand All @@ -3853,19 +3846,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
return false;
}

// If True is masked then the vmerge must have either the same mask or an all
// 1s mask, since we're going to keep the mask from True.
if (IsMasked) {
// FIXME: Support mask agnostic True instruction which would have an
// undef passthru operand.
SDValue TrueMask =
getMaskSetter(True->getOperand(Info->MaskOpIdx),
True->getOperand(True->getNumOperands() - 1));
assert(TrueMask);
if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
return false;
}

// Skip if True has side effect.
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
return false;
Expand Down Expand Up @@ -3930,24 +3910,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
(Mask && !usesAllOnesMask(Mask, Glue)))
return false;

// If we end up changing the VL or mask of True, then we need to make sure it
// doesn't raise any observable fp exceptions, since changing the active
// elements will affect how fflags is set.
if (TrueVL != VL || !IsMasked)
if (mayRaiseFPException(True.getNode()) &&
!True->getFlags().hasNoFPExcept())
return false;
// Make sure it doesn't raise any observable fp exceptions, since changing the
// active elements will affect how fflags is set.
if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
return false;

SDLoc DL(N);

// From the preconditions we checked above, we know the mask and thus glue
// for the result node will be taken from True.
if (IsMasked) {
Mask = True->getOperand(Info->MaskOpIdx);
Glue = True->getOperand(True->getNumOperands() - 1);
assert(Glue.getValueType() == MVT::Glue);
}

unsigned MaskedOpc = Info->MaskedPseudo;
#ifndef NDEBUG
const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
Expand Down Expand Up @@ -3977,8 +3946,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
Ops.push_back(False);

const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);

Ops.push_back(Mask);
Expand Down
74 changes: 63 additions & 11 deletions llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool convertToVLMAX(MachineInstr &MI) const;
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
bool convertVMergeToVMv(MachineInstr &MI) const;
bool convertAllOnesVMergeToVMv(MachineInstr &MI) const;
bool convertSameMaskVMergeToVMv(MachineInstr &MI) const;
bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
bool foldVMV_V_V(MachineInstr &MI);

Expand Down Expand Up @@ -342,17 +343,13 @@ bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
return true;
}

// Transform (VMERGE_VVM_<LMUL> pt, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> pt, true, vl, sew). It may decrease uses of VMSET.
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
static unsigned getVMV_V_VOpcodeForVMERGE_VVM(const MachineInstr &MI) {
#define CASE_VMERGE_TO_VMV(lmul) \
case RISCV::PseudoVMERGE_VVM_##lmul: \
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
break;
unsigned NewOpc;
return RISCV::PseudoVMV_V_V_##lmul;
switch (MI.getOpcode()) {
default:
return false;
return 0;
CASE_VMERGE_TO_VMV(MF8)
CASE_VMERGE_TO_VMV(MF4)
CASE_VMERGE_TO_VMV(MF2)
Expand All @@ -361,14 +358,68 @@ bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
CASE_VMERGE_TO_VMV(M4)
CASE_VMERGE_TO_VMV(M8)
}
}

/// Convert a PseudoVMERGE_VVM with an all ones mask to a PseudoVMV_V_V.
///
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %allones, sew, vl
/// ->
/// %x = PseudoVMV_V_V %passthru, %true, vl, sew, tu_mu
bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
if (!NewOpc)
return false;
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
if (!isAllOnesMask(V0Defs.lookup(&MI)))
return false;

MI.setDesc(TII->get(NewOpc));
MI.removeOperand(2); // False operand
MI.removeOperand(3); // Mask operand
MI.removeOperand(2); // False operand
MI.removeOperand(3); // Mask operand
MI.addOperand(
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));

// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
if (MI.getOperand(1).getReg() != RISCV::NoRegister)
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}

/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
/// same mask, and the masked pseudo's passthru is the same as the false
/// operand, we can convert the PseudoVMERGE_VVM to a PseudoVMV_V_V.
///
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
/// %x = PseudoVMERGE_VVM %passthru, %false, %true, %mask, vl2, sew
/// ->
/// %true = PseudoVADD_VV_M1_MASK %false, %x, %y, %mask, vl1, sew, policy
/// %x = PseudoVMV_V_V %passthru, %true, vl2, sew, tu_mu
bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) const {
unsigned NewOpc = getVMV_V_VOpcodeForVMERGE_VVM(MI);
if (!NewOpc)
return false;
MachineInstr *True = MRI->getVRegDef(MI.getOperand(3).getReg());
if (!True || !RISCV::getMaskedPseudoInfo(True->getOpcode()) ||
!hasSameEEW(MI, *True))
return false;

// True's passthru needs to be equivalent to False
Register TruePassthruReg = True->getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != RISCV::NoRegister && TruePassthruReg != FalseReg)
return false;

const MachineInstr *TrueV0Def = V0Defs.lookup(True);
const MachineInstr *MIV0Def = V0Defs.lookup(&MI);
assert(TrueV0Def && TrueV0Def->isCopy() && MIV0Def && MIV0Def->isCopy());
if (TrueV0Def->getOperand(1).getReg() != MIV0Def->getOperand(1).getReg())
return false;

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @lukel97, shouldn't we check around here that the mask for the false operand also matches?

I'm seeing this case

$v0 = COPY %15:vr
%20:vrnov0 = PseudoVRSUB_VI_MF8_MASK $noreg(tied-def 0), killed %19:vrnov0, -2, $v0, %12:gprnox0, 3, 1
…
$v0 = COPY %14:vr ; <---- Different mask!
%26:vrnov0 = PseudoVOR_VI_MF8_MASK $noreg(tied-def 0), killed %25:vrnov0, 1, $v0, %12:gprnox0, 3, 1
$v0 = COPY %15:vr
%27:vrnov0 = PseudoVMERGE_VVM_MF8 $noreg(tied-def 0), killed %26:vrnov0, killed %20:vrnov0, $v0, %12:gprnox0, 3

being turned into

$v0 = COPY %15:vr
%27:vr = PseudoVMV_V_V_MF8 $noreg(tied-def 0), killed %20:vrnov0, %12:gprnox0, 3, 0

which I don't think is equivalent.

I can open an issue with a reproducer if that helps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this, I think the False == TruePassthru check needs to be the other way round, i.e we check that False is NoRegister, not TruePassthru. The MIR should be enough to recreate a test case, I'll take a look into this now

Copy link
Contributor Author

@lukel97 lukel97 Sep 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've opened #107827 to fix it, I believe the underlying issue was that we were discarding the false operand when the true operand's passthru was undef.

In your example above, in theory I think we can still replace the VMERGE_VVM with VMV_V_V as long as the VRSUB_VI's passthru becomes the false operand. In practice though we would need to move the VRSUB_VI down to access %26, but we can't move past a copy to $v0, so the peephole should just bail instead after the patch.

MI.setDesc(TII->get(NewOpc));
MI.removeOperand(2); // False operand
MI.removeOperand(3); // Mask operand
MI.addOperand(
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));

Expand Down Expand Up @@ -622,7 +673,8 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
Changed |= tryToReduceVL(MI);
Changed |= convertToUnmasked(MI);
Changed |= convertToWholeRegister(MI);
Changed |= convertVMergeToVMv(MI);
Changed |= convertAllOnesVMergeToVMv(MI);
Changed |= convertSameMaskVMergeToVMv(MI);
if (foldUndefPassthruVMV_V_V(MI)) {
Changed |= true;
continue; // MI is erased
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,11 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: li a4, 5
; CHECK-NEXT: .LBB1_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmv1r.v v9, v8
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
; CHECK-NEXT: vlse8.v v9, (a1), a4, v0.t
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a1), a4, v0.t
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: addi a1, a1, 160
; CHECK-NEXT: bne a0, a2, .LBB1_1
Expand Down Expand Up @@ -344,12 +343,11 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; CHECK-NEXT: li a4, 5
; CHECK-NEXT: .LBB7_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, mu
; CHECK-NEXT: vle8.v v9, (a1)
; CHECK-NEXT: vmv1r.v v10, v8
; CHECK-NEXT: vlse8.v v10, (a0), a4, v0.t
; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vsse8.v v9, (a0), a4, v0.t
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vlse8.v v9, (a0), a4, v0.t
; CHECK-NEXT: vadd.vv v8, v9, v8
; CHECK-NEXT: vsse8.v v8, (a0), a4, v0.t
; CHECK-NEXT: addi a1, a1, 32
; CHECK-NEXT: addi a0, a0, 160
; CHECK-NEXT: bne a1, a2, .LBB7_1
Expand Down
70 changes: 70 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,73 @@ body: |
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
...
---
name: same_mask
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: same_mask
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vr = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vr = COPY $v0
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vr = COPY $v0
$v0 = COPY %mask
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
...
---
# Shouldn't be converted because false operands are different
name: same_mask_different_false
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: same_mask_different_false
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vr = COPY $v0
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vr = COPY $v0
$v0 = COPY %mask
%true:vrnov0 = PseudoVADD_VV_M1_MASK %pt, $noreg, $noreg, $v0, 4, 5 /* e32 */, 0 /* tu, mu */
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
...
---
# Shouldn't be converted because EEWs are different
name: same_mask_different_eew
body: |
bb.0:
liveins: $v8, $v9, $v0
; CHECK-LABEL: name: same_mask_different_eew
; CHECK: liveins: $v8, $v9, $v0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vr = COPY $v0
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vr = COPY $v0
$v0 = COPY %mask
%true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, $v0, 4, 4 /* e16 */, 0 /* tu, mu */
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, 8, 5 /* e32 */
Loading