Skip to content

Commit 0997093

Browse files
committed
[RISCV] Reduce LMUL when index is known when lowering insert_vector_elt
Continuing on from #65997, if the index of insert_vector_elt is a constant then we can work out what the minimum number of registers will be needed for the slideup and choose a smaller type to operate on. This reduces the LMUL for not just the slideup but also for the scalar insert.
1 parent 5ffbdd9 commit 0997093

9 files changed

+252
-226
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7458,6 +7458,19 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
74587458
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
74597459
}
74607460

7461+
MVT OrigContainerVT = ContainerVT;
7462+
SDValue OrigVec = Vec;
7463+
// If we know the index we're going to insert at, we can shrink down Vec so
7464+
// we're performing the vslide1down on a smaller LMUL.
7465+
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
7466+
if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(),
7467+
DL, DAG, Subtarget)) {
7468+
ContainerVT = *ShrunkVT;
7469+
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
7470+
DAG.getVectorIdxConstant(0, DL));
7471+
}
7472+
}
7473+
74617474
MVT XLenVT = Subtarget.getXLenVT();
74627475

74637476
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
@@ -7482,6 +7495,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
74827495
VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
74837496
if (isNullConstant(Idx)) {
74847497
Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
7498+
7499+
if (ContainerVT != OrigContainerVT)
7500+
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7501+
Vec, DAG.getVectorIdxConstant(0, DL));
74857502
if (!VecVT.isFixedLengthVector())
74867503
return Vec;
74877504
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
@@ -7514,6 +7531,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
75147531
// Bitcast back to the right container type.
75157532
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
75167533

7534+
if (ContainerVT != OrigContainerVT)
7535+
ValInVec =
7536+
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7537+
ValInVec, DAG.getVectorIdxConstant(0, DL));
75177538
if (!VecVT.isFixedLengthVector())
75187539
return ValInVec;
75197540
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
@@ -7544,6 +7565,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
75447565
Policy = RISCVII::TAIL_AGNOSTIC;
75457566
SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
75467567
Idx, Mask, InsertVL, Policy);
7568+
7569+
if (ContainerVT != OrigContainerVT)
7570+
Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
7571+
Slideup, DAG.getVectorIdxConstant(0, DL));
75477572
if (!VecVT.isFixedLengthVector())
75487573
return Slideup;
75497574
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind {
108108
; CHECK-NEXT: vmv.v.i v8, 0
109109
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
110110
; CHECK-NEXT: vmv.s.x v12, a0
111-
; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma
111+
; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma
112112
; CHECK-NEXT: vslideup.vi v8, v12, 1
113113
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
114114
; CHECK-NEXT: vand.vi v8, v8, 1

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
4040
; CHECK-LABEL: insertelt_v32i32_0:
4141
; CHECK: # %bb.0:
4242
; CHECK-NEXT: li a1, 32
43-
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
43+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
4444
; CHECK-NEXT: vmv.s.x v8, a0
4545
; CHECK-NEXT: ret
4646
%b = insertelement <32 x i32> %a, i32 %y, i32 0
@@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
5454
; CHECK-NEXT: li a1, 32
5555
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
5656
; CHECK-NEXT: vmv.s.x v16, a0
57-
; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma
57+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
5858
; CHECK-NEXT: vslideup.vi v8, v16, 4
5959
; CHECK-NEXT: ret
6060
%b = insertelement <32 x i32> %a, i32 %y, i32 4
@@ -92,7 +92,7 @@ define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
9292
; CHECK-LABEL: insertelt_v64i32_0:
9393
; CHECK: # %bb.0:
9494
; CHECK-NEXT: li a1, 32
95-
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
95+
; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
9696
; CHECK-NEXT: vmv.s.x v8, a0
9797
; CHECK-NEXT: ret
9898
%b = insertelement <64 x i32> %a, i32 %y, i32 0
@@ -390,7 +390,7 @@ define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) {
390390
; CHECK-LABEL: insertelt_v8i64_0:
391391
; CHECK: # %bb.0:
392392
; CHECK-NEXT: li a0, -1
393-
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
393+
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
394394
; CHECK-NEXT: vmv.s.x v8, a0
395395
; CHECK-NEXT: ret
396396
%b = insertelement <8 x i64> %a, i64 -1, i32 0
@@ -468,7 +468,7 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
468468
; CHECK-LABEL: insertelt_c6_v8i64_0:
469469
; CHECK: # %bb.0:
470470
; CHECK-NEXT: li a0, 6
471-
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
471+
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
472472
; CHECK-NEXT: vmv.s.x v8, a0
473473
; CHECK-NEXT: ret
474474
%b = insertelement <8 x i64> %a, i64 6, i32 0
@@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
550550
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
551551
; CHECK-NEXT: vle64.v v8, (a0)
552552
; CHECK-NEXT: li a2, 6
553-
; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
553+
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
554554
; CHECK-NEXT: vmv.s.x v8, a2
555-
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
555+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
556556
; CHECK-NEXT: vle64.v v12, (a1)
557557
; CHECK-NEXT: vadd.vv v8, v8, v12
558558
; CHECK-NEXT: vse64.v v8, (a0)

0 commit comments

Comments
 (0)