Skip to content

[RISCV] Combine vslideup_vl with known VL to a smaller LMUL #66671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 47 additions & 9 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8805,15 +8805,6 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}

// Shrink down Vec so we're performing the slidedown on a smaller LMUL.
unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
if (auto ShrunkVT =
getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
ContainerVT = *ShrunkVT;
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
DAG.getVectorIdxConstant(0, DL));
}

SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. This
Expand Down Expand Up @@ -14260,6 +14251,53 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
return V;
break;
case RISCVISD::VSLIDEUP_VL:
case RISCVISD::VSLIDEDOWN_VL: {
MVT OrigVT = N->getSimpleValueType(0);
auto *CVL = dyn_cast<ConstantSDNode>(N->getOperand(4));
if (!CVL)
break;

// The maximum index read or written is VL - 1 for vslideup, and VL + offset
// - 1 for vslidedown.
unsigned MaxIdx = CVL->getZExtValue() - 1;
if (N->getOpcode() == RISCVISD::VSLIDEDOWN_VL) {
auto *COffset = dyn_cast<ConstantSDNode>(N->getOperand(2));
if (!COffset)
break;
MaxIdx += COffset->getZExtValue();
}

// We can try and reduce the LMUL that a vslide* uses if we know where
// the maximum index is. For example, if the target has Zvl128b, a
// vslidedown of e32 with with an offset of 4 and VL of 2 is only going to
// read from the first 2 registers at most. So if we were operating at
// LMUL=4 (nxv8i32), we can reduce it to LMUL=2(nxv4i32).
if (auto ShrunkVT =
getSmallestVTForIndex(OrigVT, MaxIdx, DL, DAG, Subtarget)) {
SDValue ShrunkPassthru =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, *ShrunkVT, N->getOperand(0),
DAG.getVectorIdxConstant(0, DL));
SDValue ShrunkInVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, *ShrunkVT, N->getOperand(1),
DAG.getVectorIdxConstant(0, DL));

// The only mask ever used in vslide*_vl nodes is vmset_vl, and the only
// patterns on vslide*_vl only accept vmset_vl. So create a new vmset
// since using an extract_subvector breaks patterns.
assert(N->getOperand(3).getOpcode() == RISCVISD::VMSET_VL);
SDValue ShrunkMask =
DAG.getNode(RISCVISD::VMSET_VL, SDLoc(N), getMaskTypeFor(*ShrunkVT),
N->getOperand(4));
SDValue ShrunkSlidedown =
DAG.getNode(N->getOpcode(), DL, *ShrunkVT,
{ShrunkPassthru, ShrunkInVec, N->getOperand(2),
ShrunkMask, N->getOperand(4), N->getOperand(5)});
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigVT, N->getOperand(0),
ShrunkSlidedown, DAG.getVectorIdxConstant(0, DL));
}
break;
}
case RISCVISD::VFMV_V_F_VL: {
const MVT VT = N->getSimpleValueType(0);
SDValue Passthru = N->getOperand(0);
Expand Down
18 changes: 10 additions & 8 deletions llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -679,12 +679,13 @@ define i64 @extractelt_nxv4i64_0(<vscale x 4 x i64> %v) {
define i64 @extractelt_nxv4i64_imm(<vscale x 4 x i64> %v) {
; CHECK-LABEL: extractelt_nxv4i64_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsrl.vx v12, v8, a0
; CHECK-NEXT: vmv.x.s a1, v12
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma
; CHECK-NEXT: vsrl.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: ret
%r = extractelement <vscale x 4 x i64> %v, i32 2
ret i64 %r
Expand Down Expand Up @@ -720,12 +721,13 @@ define i64 @extractelt_nxv8i64_0(<vscale x 8 x i64> %v) {
define i64 @extractelt_nxv8i64_imm(<vscale x 8 x i64> %v) {
; CHECK-LABEL: extractelt_nxv8i64_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsrl.vx v16, v8, a0
; CHECK-NEXT: vmv.x.s a1, v16
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
; CHECK-NEXT: vsrl.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a1, v8
; CHECK-NEXT: ret
%r = extractelement <vscale x 8 x i64> %v, i32 2
ret i64 %r
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv.s.x v12, a0
; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma
; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vand.vi v8, v8, 1
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 2
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
Expand All @@ -40,7 +40,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 6
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
Expand All @@ -65,7 +65,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %
; LMULMAX1-NEXT: vle32.v v16, (a0)
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma
; LMULMAX1-NEXT: vmv.v.v v8, v12
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; LMULMAX1-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; LMULMAX1-NEXT: vslideup.vi v8, v16, 4
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, ptr %svp
Expand Down Expand Up @@ -197,7 +197,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v10, (a0)
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; LMULMAX2-NEXT: vslideup.vi v10, v8, 2
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vse32.v v10, (a0)
Expand Down Expand Up @@ -509,7 +509,7 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, <vscale x 16 x i64>* %o
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, ma
; CHECK-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: ret
Expand Down Expand Up @@ -539,7 +539,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, <vscale x 16 x i64>* %out) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v16, v8, 2
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ret
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: ret
%b = insertelement <32 x i32> %a, i32 %y, i32 4
Expand Down
Loading