Skip to content

[RISCV] Begin moving post-isel vector peepholes to a MF pass #70342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_llvm_target(RISCVCodeGen
RISCVMakeCompressible.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
RISCVFoldMasks.cpp
RISCVFrameLowering.cpp
RISCVGatherScatterLowering.cpp
RISCVInsertVSETVLI.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);

FunctionPass *createRISCVFoldMasksPass();
void initializeRISCVFoldMasksPass(PassRegistry &);

FunctionPass *createRISCVOptWInstrsPass();
void initializeRISCVOptWInstrsPass(PassRegistry &);

Expand Down
174 changes: 174 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFoldMasks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// This pass performs various peephole optimisations that fold masks into vector
// pseudo instructions after instruction selection.
//
// Currently it converts
// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
// ->
// PseudoVMV_V_V %false, %true, %vl, %sew
//
//===---------------------------------------------------------------------===//

#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"

using namespace llvm;

#define DEBUG_TYPE "riscv-fold-masks"

namespace {

class RISCVFoldMasks : public MachineFunctionPass {
public:
static char ID;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
RISCVFoldMasks() : MachineFunctionPass(ID) {
initializeRISCVFoldMasksPass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}

StringRef getPassName() const override { return "RISC-V Fold Masks"; }

private:
bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);

bool isAllOnesMask(MachineInstr *MaskCopy);
};

} // namespace

char RISCVFoldMasks::ID = 0;

INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)

bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
if (!MaskCopy)
return false;
assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0);
Register SrcReg =
TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI);
if (!SrcReg.isVirtual())
return false;
MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
if (!SrcDef)
return false;

// TODO: Check that the VMSET is the expected bitwidth? The pseudo has
// undefined behaviour if it's the wrong bitwidth, so we could choose to
// assume that it's all-ones? Same applies to its VL.
switch (SrcDef->getOpcode()) {
case RISCV::PseudoVMSET_M_B1:
case RISCV::PseudoVMSET_M_B2:
case RISCV::PseudoVMSET_M_B4:
case RISCV::PseudoVMSET_M_B8:
case RISCV::PseudoVMSET_M_B16:
case RISCV::PseudoVMSET_M_B32:
case RISCV::PseudoVMSET_M_B64:
return true;
default:
return false;
}
}

// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
#define CASE_VMERGE_TO_VMV(lmul) \
case RISCV::PseudoVMERGE_VVM_##lmul: \
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
break;
unsigned NewOpc;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
CASE_VMERGE_TO_VMV(MF8)
CASE_VMERGE_TO_VMV(MF4)
CASE_VMERGE_TO_VMV(MF2)
CASE_VMERGE_TO_VMV(M1)
CASE_VMERGE_TO_VMV(M2)
CASE_VMERGE_TO_VMV(M4)
CASE_VMERGE_TO_VMV(M8)
}

Register MergeReg = MI.getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();
// Check merge == false (or merge == undef)
if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
TRI->lookThruCopyLike(FalseReg, MRI))
return false;

assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
if (!isAllOnesMask(V0Def))
return false;

MI.setDesc(TII->get(NewOpc));
MI.removeOperand(1); // Merge operand
MI.tieOperands(0, 1); // Tie false to dest
MI.removeOperand(3); // Mask operand
MI.addOperand(
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));

// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and merge operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}

bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
if (!ST.hasVInstructions())
return false;

TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
TRI = MRI->getTargetRegisterInfo();

bool Changed = false;

// Masked pseudos coming out of isel will have their mask operand in the form:
//
// $v0:vr = COPY %mask:vr
// %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr
//
// Because $v0 isn't in SSA, keep track of it so we can check the mask operand
// on each pseudo.
MachineInstr *CurrentV0Def;
for (MachineBasicBlock &MBB : MF) {
CurrentV0Def = nullptr;
for (MachineInstr &MI : MBB) {
unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
if (BaseOpc == RISCV::VMERGE_VVM)
Changed |= convertVMergeToVMv(MI, CurrentV0Def);

if (MI.definesRegister(RISCV::V0, TRI))
CurrentV0Def = &MI;
}
}

return Changed;
}

FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); }
36 changes: 0 additions & 36 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3685,40 +3685,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
return true;
}

// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
#define CASE_VMERGE_TO_VMV(lmul) \
case RISCV::PseudoVMERGE_VVM_##lmul: \
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
break;
unsigned NewOpc;
switch (N->getMachineOpcode()) {
default:
llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
CASE_VMERGE_TO_VMV(MF8)
CASE_VMERGE_TO_VMV(MF4)
CASE_VMERGE_TO_VMV(MF2)
CASE_VMERGE_TO_VMV(M1)
CASE_VMERGE_TO_VMV(M2)
CASE_VMERGE_TO_VMV(M4)
CASE_VMERGE_TO_VMV(M8)
}

if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
return false;

SDLoc DL(N);
SDValue PolicyOp =
CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
SDNode *Result = CurDAG->getMachineNode(
NewOpc, DL, N->getValueType(0),
{N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
PolicyOp});
ReplaceUses(N, Result);
return true;
}

bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
bool MadeChange = false;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
Expand All @@ -3730,8 +3696,6 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {

if (IsVMerge(N) || IsVMv(N))
MadeChange |= performCombineVMergeAndVOps(N);
if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
MadeChange |= performVMergeToVMv(N);
}
return MadeChange;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVOptWInstrsPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
Expand Down Expand Up @@ -414,7 +415,10 @@ void RISCVPassConfig::addPreEmitPass2() {
}

void RISCVPassConfig::addMachineSSAOptimization() {
addPass(createRISCVFoldMasksPass());

TargetPassConfig::addMachineSSAOptimization();

if (EnableMachineCombiner)
addPass(&MachineCombinerID);

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: RISC-V Fold Masks
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
; CHECK-NEXT: Optimize machine instruction PHIs
Expand Down
71 changes: 71 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \
# RUN: -verify-machineinstrs | FileCheck %s

---
name: undef_passthru
body: |
bb.0:
liveins: $x1, $v8, $v9
; CHECK-LABEL: name: undef_passthru
; CHECK: liveins: $x1, $v8, $v9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %false:vr = COPY $v8
; CHECK-NEXT: %true:vr = COPY $v9
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
%false:vr = COPY $v8
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5
...
---
name: undef_false
body: |
bb.0:
liveins: $x1, $v8, $v9
; CHECK-LABEL: name: undef_false
; CHECK: liveins: $x1, $v8, $v9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vr = COPY $noreg
; CHECK-NEXT: %true:vr = COPY $v9
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vr = COPY $noreg
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
...
---
name: equal_passthru_false
body: |
bb.0:
liveins: $x1, $v8, $v9
; CHECK-LABEL: name: equal_passthru_false
; CHECK: liveins: $x1, $v8, $v9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %false:vr = COPY $v8
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %true:vr = COPY $v9
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
%false:vr = COPY $v8
%pt:vrnov0 = COPY $v8
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
...