diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ad56bc757115f..cc45f175f492d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7466,6 +7466,32 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget); } +// Given a scalable vector type and an index into it, returns the type for the +// smallest subvector that the index fits in. This can be used to reduce LMUL +// for operations like vslidedown. +// +// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32. +static std::optional +getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(VecVT.isScalableVector()); + const unsigned EltSize = VecVT.getScalarSizeInBits(); + const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); + const unsigned MinVLMAX = VectorBitsMin / EltSize; + MVT SmallerVT; + if (MaxIdx < MinVLMAX) + SmallerVT = getLMUL1VT(VecVT); + else if (MaxIdx < MinVLMAX * 2) + SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT(); + else if (MaxIdx < MinVLMAX * 4) + SmallerVT = getLMUL1VT(VecVT) + .getDoubleNumVectorElementsVT() + .getDoubleNumVectorElementsVT(); + if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT)) + return std::nullopt; + return SmallerVT; +} + // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then // extract the first element: (extractelt (slidedown vec, idx), 0). For integer // types this is done using VMV_X_S to allow us to glean information about the @@ -7554,21 +7580,9 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, if (auto *IdxC = dyn_cast(Idx)) MaxIdx = IdxC->getZExtValue(); if (MaxIdx) { - const unsigned EltSize = ContainerVT.getScalarSizeInBits(); - const unsigned VectorBitsMin = Subtarget.getRealMinVLen(); - const unsigned MinVLMAX = VectorBitsMin/EltSize; - MVT SmallerVT; - if (*MaxIdx < MinVLMAX) - SmallerVT = getLMUL1VT(ContainerVT); - else if (*MaxIdx < MinVLMAX * 2) - SmallerVT = getLMUL1VT(ContainerVT) - .getDoubleNumVectorElementsVT(); - else if (*MaxIdx < MinVLMAX * 4) - SmallerVT = getLMUL1VT(ContainerVT) - .getDoubleNumVectorElementsVT() - .getDoubleNumVectorElementsVT(); - if (SmallerVT.isValid() && ContainerVT.bitsGT(SmallerVT)) { - ContainerVT = SmallerVT; + if (auto SmallerVT = + getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) { + ContainerVT = *SmallerVT; Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, DAG.getConstant(0, DL, XLenVT)); } @@ -8751,6 +8765,16 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, ContainerVT = getContainerForFixedLengthVector(VecVT); Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } + + // Shrink down Vec so we're performing the slidedown on a smaller LMUL. + unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; + if (auto ShrunkVT = + getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { + ContainerVT = *ShrunkVT; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, + DAG.getVectorIdxConstant(0, DL)); + } + SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. This diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index fa23b9a1b76fd..b4260b04604cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -113,7 +113,7 @@ define void @extract_v2i32_v8i32_2(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vse32.v v8, (a1) @@ -171,7 +171,7 @@ define void @extract_v2i32_nxv16i32_0( %x, ptr %y) { define void @extract_v2i32_nxv16i32_2( %x, ptr %y) { ; CHECK-LABEL: extract_v2i32_nxv16i32_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) @@ -184,7 +184,7 @@ define void @extract_v2i32_nxv16i32_2( %x, ptr %y) { define void @extract_v2i32_nxv16i32_4( %x, ptr %y) { ; CHECK-LABEL: extract_v2i32_nxv16i32_4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) @@ -197,7 +197,7 @@ define void @extract_v2i32_nxv16i32_4( %x, ptr %y) { define void @extract_v2i32_nxv16i32_6( %x, ptr %y) { ; CHECK-LABEL: extract_v2i32_nxv16i32_6: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 6 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) @@ -210,7 +210,7 @@ define void @extract_v2i32_nxv16i32_6( %x, ptr %y) { define void @extract_v2i32_nxv16i32_8( %x, ptr %y) { ; CHECK-LABEL: extract_v2i32_nxv16i32_8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) @@ -273,7 +273,7 @@ define void @extract_v2i8_nxv2i8_6( %x, ptr %y) { define void @extract_v8i32_nxv16i32_8( %x, ptr %y) { ; CHECK-LABEL: extract_v8i32_nxv16i32_8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e32, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) @@ -437,7 +437,7 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -555,7 +555,7 @@ define void @extract_v2i1_nxv64i1_2( %x, ptr %y) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -581,7 +581,7 @@ define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: li a1, 42 -; CHECK-NEXT: vsetivli zero, 2, e8, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -606,7 +606,7 @@ define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 26 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 0