diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index de58335b43565..179db28ec83e4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8805,15 +8805,6 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op, Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); } - // Shrink down Vec so we're performing the slidedown on a smaller LMUL. - unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1; - if (auto ShrunkVT = - getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) { - ContainerVT = *ShrunkVT; - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec, - DAG.getVectorIdxConstant(0, DL)); - } - SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. This @@ -14260,6 +14251,53 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this)) return V; break; + case RISCVISD::VSLIDEUP_VL: + case RISCVISD::VSLIDEDOWN_VL: { + MVT OrigVT = N->getSimpleValueType(0); + auto *CVL = dyn_cast(N->getOperand(4)); + if (!CVL) + break; + + // The maximum index read or written is VL - 1 for vslideup, and VL + offset + // - 1 for vslidedown. + unsigned MaxIdx = CVL->getZExtValue() - 1; + if (N->getOpcode() == RISCVISD::VSLIDEDOWN_VL) { + auto *COffset = dyn_cast(N->getOperand(2)); + if (!COffset) + break; + MaxIdx += COffset->getZExtValue(); + } + + // We can try and reduce the LMUL that a vslide* uses if we know where + // the maximum index is. For example, if the target has Zvl128b, a + // vslidedown of e32 with with an offset of 4 and VL of 2 is only going to + // read from the first 2 registers at most. So if we were operating at + // LMUL=4 (nxv8i32), we can reduce it to LMUL=2(nxv4i32). + if (auto ShrunkVT = + getSmallestVTForIndex(OrigVT, MaxIdx, DL, DAG, Subtarget)) { + SDValue ShrunkPassthru = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, *ShrunkVT, N->getOperand(0), + DAG.getVectorIdxConstant(0, DL)); + SDValue ShrunkInVec = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, *ShrunkVT, N->getOperand(1), + DAG.getVectorIdxConstant(0, DL)); + + // The only mask ever used in vslide*_vl nodes is vmset_vl, and the only + // patterns on vslide*_vl only accept vmset_vl. So create a new vmset + // since using an extract_subvector breaks patterns. + assert(N->getOperand(3).getOpcode() == RISCVISD::VMSET_VL); + SDValue ShrunkMask = + DAG.getNode(RISCVISD::VMSET_VL, SDLoc(N), getMaskTypeFor(*ShrunkVT), + N->getOperand(4)); + SDValue ShrunkSlidedown = + DAG.getNode(N->getOpcode(), DL, *ShrunkVT, + {ShrunkPassthru, ShrunkInVec, N->getOperand(2), + ShrunkMask, N->getOperand(4), N->getOperand(5)}); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigVT, N->getOperand(0), + ShrunkSlidedown, DAG.getVectorIdxConstant(0, DL)); + } + break; + } case RISCVISD::VFMV_V_F_VL: { const MVT VT = N->getSimpleValueType(0); SDValue Passthru = N->getOperand(0); diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll index fd2f89e26e598..c3181a296abe0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -679,12 +679,13 @@ define i64 @extractelt_nxv4i64_0( %v) { define i64 @extractelt_nxv4i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsrl.vx v12, v8, a0 -; CHECK-NEXT: vmv.x.s a1, v12 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r @@ -720,12 +721,13 @@ define i64 @extractelt_nxv8i64_0( %v) { define i64 @extractelt_nxv8i64_imm( %v) { ; CHECK-LABEL: extractelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsrl.vx v16, v8, a0 -; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 ret i64 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll index b3cbad3d9e6b1..f7737784d4ca5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 1 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 1d6a45ed36f33..133b09428ed96 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -27,7 +27,7 @@ define @insert_nxv8i32_v2i32_2( %vec, ptr % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v12, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 2 ; CHECK-NEXT: ret %sv = load <2 x i32>, ptr %svp @@ -40,7 +40,7 @@ define @insert_nxv8i32_v2i32_6( %vec, ptr % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v12, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 6 ; CHECK-NEXT: ret %sv = load <2 x i32>, ptr %svp @@ -65,7 +65,7 @@ define @insert_nxv8i32_v8i32_0( %vec, ptr % ; LMULMAX1-NEXT: vle32.v v16, (a0) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma ; LMULMAX1-NEXT: vmv.v.v v8, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; LMULMAX1-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; LMULMAX1-NEXT: vslideup.vi v8, v16, 4 ; LMULMAX1-NEXT: ret %sv = load <8 x i32>, ptr %svp @@ -197,7 +197,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { ; LMULMAX2-NEXT: vle32.v v8, (a1) ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v10, (a0) -; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; LMULMAX2-NEXT: vslideup.vi v10, v8, 2 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vse32.v v10, (a0) @@ -509,7 +509,7 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, * %o ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: vs8r.v v8, (a2) ; CHECK-NEXT: ret @@ -539,7 +539,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, * %out) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vslideup.vi v16, v8, 2 ; CHECK-NEXT: vs8r.v v16, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 373a96356a207..6f5ab60fb4ad0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 4 ; CHECK-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll index 4e60edf058450..eb74c5e608b93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll @@ -811,9 +811,11 @@ define i64 @explode_4xi64(<4 x i64> %v) { ; RV32-NEXT: vsrl.vx v10, v8, a0 ; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 1 -; RV32-NEXT: vsrl.vx v12, v10, a0 -; RV32-NEXT: vmv.x.s a3, v12 +; RV32-NEXT: vmv.x.s a3, v10 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vsrl.vx v10, v10, a0 ; RV32-NEXT: vmv.x.s a4, v10 ; RV32-NEXT: vslidedown.vi v10, v8, 2 ; RV32-NEXT: vsrl.vx v12, v10, a0 @@ -823,12 +825,12 @@ define i64 @explode_4xi64(<4 x i64> %v) { ; RV32-NEXT: vsrl.vx v10, v8, a0 ; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: vmv.x.s a7, v8 -; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: add a4, a2, a4 -; RV32-NEXT: sltu a2, a4, a2 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a3, a2, a3 +; RV32-NEXT: sltu a2, a3, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: add a6, a4, a6 -; RV32-NEXT: sltu a2, a6, a4 +; RV32-NEXT: add a6, a3, a6 +; RV32-NEXT: sltu a2, a6, a3 ; RV32-NEXT: add a1, a1, a5 ; RV32-NEXT: add a0, a2, a0 ; RV32-NEXT: add a1, a1, a0 @@ -875,15 +877,21 @@ define i64 @explode_8xi64(<8 x i64> %v) { ; RV32-NEXT: vsrl.vx v12, v8, a0 ; RV32-NEXT: vmv.x.s a1, v12 ; RV32-NEXT: vmv.x.s a2, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v12, v8, 1 -; RV32-NEXT: vsrl.vx v16, v12, a0 -; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: vmv.x.s a3, v12 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vsrl.vx v12, v12, a0 ; RV32-NEXT: vmv.x.s a4, v12 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v12, v8, 2 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v16, v12, a0 ; RV32-NEXT: vmv.x.s a5, v16 ; RV32-NEXT: vmv.x.s a6, v12 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v12, v8, 3 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v16, v12, a0 ; RV32-NEXT: vmv.x.s a7, v16 ; RV32-NEXT: vmv.x.s t0, v12 @@ -903,19 +911,19 @@ define i64 @explode_8xi64(<8 x i64> %v) { ; RV32-NEXT: vsrl.vx v12, v8, a0 ; RV32-NEXT: vmv.x.s a0, v12 ; RV32-NEXT: vmv.x.s s0, v8 -; RV32-NEXT: add a1, a1, a3 -; RV32-NEXT: add a4, a2, a4 -; RV32-NEXT: sltu a2, a4, a2 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: add a3, a2, a3 +; RV32-NEXT: sltu a2, a3, a2 ; RV32-NEXT: add a1, a1, a2 -; RV32-NEXT: add a6, a4, a6 -; RV32-NEXT: sltu a2, a6, a4 +; RV32-NEXT: add a6, a3, a6 +; RV32-NEXT: sltu a2, a6, a3 ; RV32-NEXT: add a1, a1, a5 -; RV32-NEXT: add a2, a2, a7 -; RV32-NEXT: add a1, a1, a2 ; RV32-NEXT: add t0, a6, t0 -; RV32-NEXT: sltu a2, t0, a6 -; RV32-NEXT: add a2, a2, t1 +; RV32-NEXT: sltu a3, t0, a6 +; RV32-NEXT: add a2, a2, a7 ; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a3, a3, t1 +; RV32-NEXT: add a1, a1, a3 ; RV32-NEXT: add t2, t0, t2 ; RV32-NEXT: sltu a2, t2, t0 ; RV32-NEXT: add a2, a2, t3 @@ -1029,115 +1037,129 @@ define i64 @explode_16xi64(<16 x i64> %v) { ; RV32-NEXT: vmv.x.s a0, v16 ; RV32-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: vmv.x.s a3, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a1 +; RV32-NEXT: vmv.x.s a4, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v24, v16, a1 ; RV32-NEXT: vmv.x.s a5, v24 ; RV32-NEXT: vmv.x.s a6, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 2 -; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s a3, v24 -; RV32-NEXT: vmv.x.s a4, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s2, v24 +; RV32-NEXT: vmv.x.s t0, v24 ; RV32-NEXT: vmv.x.s a7, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 4 -; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s3, v24 -; RV32-NEXT: vmv.x.s t0, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 5 -; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s4, v24 ; RV32-NEXT: vmv.x.s t1, v16 -; RV32-NEXT: vslidedown.vi v16, v8, 6 -; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s5, v24 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a1 +; RV32-NEXT: vmv.x.s t3, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 5 ; RV32-NEXT: vmv.x.s t2, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a1 +; RV32-NEXT: vmv.x.s t5, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vmv.x.s t4, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a1 +; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV32-NEXT: vslidedown.vi v16, v8, 7 -; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s6, v24 -; RV32-NEXT: vmv.x.s t3, v16 +; RV32-NEXT: vmv.x.s t6, v16 +; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v16, a1 +; RV32-NEXT: vmv.x.s ra, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 8 ; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s7, v24 -; RV32-NEXT: vmv.x.s t4, v16 +; RV32-NEXT: vmv.x.s s6, v24 +; RV32-NEXT: vmv.x.s s1, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 9 ; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s8, v24 -; RV32-NEXT: vmv.x.s t5, v16 +; RV32-NEXT: vmv.x.s s7, v24 +; RV32-NEXT: vmv.x.s s2, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 10 ; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s9, v24 -; RV32-NEXT: vmv.x.s t6, v16 +; RV32-NEXT: vmv.x.s s8, v24 +; RV32-NEXT: vmv.x.s s3, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 11 ; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s10, v24 -; RV32-NEXT: vmv.x.s s0, v16 +; RV32-NEXT: vmv.x.s s9, v24 +; RV32-NEXT: vmv.x.s s4, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 12 ; RV32-NEXT: vsrl.vx v24, v16, a1 -; RV32-NEXT: vmv.x.s s11, v24 -; RV32-NEXT: vmv.x.s s1, v16 +; RV32-NEXT: vmv.x.s s10, v24 +; RV32-NEXT: vmv.x.s s5, v16 ; RV32-NEXT: vslidedown.vi v0, v8, 13 ; RV32-NEXT: vsrl.vx v16, v0, a1 -; RV32-NEXT: vmv.x.s ra, v16 +; RV32-NEXT: vmv.x.s s11, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 14 ; RV32-NEXT: vsrl.vx v24, v16, a1 ; RV32-NEXT: vslidedown.vi v8, v8, 15 ; RV32-NEXT: vmv.x.s a2, v0 ; RV32-NEXT: vsrl.vx v0, v8, a1 ; RV32-NEXT: lw a1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: add a5, a1, a5 -; RV32-NEXT: add a6, a0, a6 -; RV32-NEXT: sltu a0, a6, a0 -; RV32-NEXT: add a0, a5, a0 -; RV32-NEXT: add a0, a0, a3 -; RV32-NEXT: add a4, a6, a4 -; RV32-NEXT: sltu a1, a4, a6 -; RV32-NEXT: add a1, a1, s2 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add a7, a4, a7 -; RV32-NEXT: sltu a1, a7, a4 -; RV32-NEXT: add a1, a1, s3 +; RV32-NEXT: add a4, a1, a4 +; RV32-NEXT: add a3, a0, a3 +; RV32-NEXT: sltu a0, a3, a0 +; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: add a0, a0, a5 +; RV32-NEXT: add a6, a3, a6 +; RV32-NEXT: sltu a1, a6, a3 +; RV32-NEXT: add a1, a1, t0 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add t0, a7, t0 -; RV32-NEXT: sltu a1, t0, a7 -; RV32-NEXT: add a1, a1, s4 +; RV32-NEXT: add a7, a6, a7 +; RV32-NEXT: sltu a1, a7, a6 +; RV32-NEXT: add a1, a1, t3 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add t1, t0, t1 -; RV32-NEXT: sltu a1, t1, t0 -; RV32-NEXT: add a1, a1, s5 +; RV32-NEXT: add t1, a7, t1 +; RV32-NEXT: sltu a1, t1, a7 +; RV32-NEXT: add a1, a1, t5 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add t2, t1, t2 ; RV32-NEXT: sltu a1, t2, t1 +; RV32-NEXT: add a1, a1, s0 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add t4, t2, t4 +; RV32-NEXT: sltu a1, t4, t2 +; RV32-NEXT: add a1, a1, ra +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add t6, t4, t6 +; RV32-NEXT: sltu a1, t6, t4 ; RV32-NEXT: add a1, a1, s6 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add t3, t2, t3 -; RV32-NEXT: sltu a1, t3, t2 +; RV32-NEXT: add s1, t6, s1 +; RV32-NEXT: sltu a1, s1, t6 ; RV32-NEXT: add a1, a1, s7 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add t4, t3, t4 -; RV32-NEXT: sltu a1, t4, t3 +; RV32-NEXT: add s2, s1, s2 +; RV32-NEXT: sltu a1, s2, s1 ; RV32-NEXT: add a1, a1, s8 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add t5, t4, t5 -; RV32-NEXT: sltu a1, t5, t4 +; RV32-NEXT: add s3, s2, s3 +; RV32-NEXT: sltu a1, s3, s2 ; RV32-NEXT: add a1, a1, s9 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add t6, t5, t6 -; RV32-NEXT: sltu a1, t6, t5 +; RV32-NEXT: add s4, s3, s4 +; RV32-NEXT: sltu a1, s4, s3 ; RV32-NEXT: add a1, a1, s10 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add s0, t6, s0 -; RV32-NEXT: sltu a1, s0, t6 +; RV32-NEXT: add s5, s4, s5 +; RV32-NEXT: sltu a1, s5, s4 ; RV32-NEXT: add a1, a1, s11 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: add s1, s0, s1 -; RV32-NEXT: sltu a1, s1, s0 -; RV32-NEXT: add a1, a1, ra -; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: vmv.x.s a1, v24 -; RV32-NEXT: add a2, s1, a2 -; RV32-NEXT: sltu a3, a2, s1 +; RV32-NEXT: add a2, s5, a2 +; RV32-NEXT: sltu a3, a2, s5 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: vmv.x.s a3, v16 ; RV32-NEXT: add a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 3711f014e0647..6c0288dd9b5d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2431,7 +2431,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 @@ -2439,7 +2439,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2447,7 +2447,7 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -2531,9 +2531,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -2579,7 +2579,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 @@ -2591,9 +2591,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10 @@ -2681,9 +2681,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -2729,7 +2729,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6 @@ -2741,9 +2741,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7 ; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10 @@ -2835,9 +2835,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -2885,7 +2885,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6 @@ -2899,7 +2899,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7 @@ -2993,9 +2993,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -3041,7 +3041,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6 @@ -3053,9 +3053,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7 ; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10 @@ -3144,9 +3144,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -3192,7 +3192,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6 @@ -3204,9 +3204,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7 ; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10 @@ -3299,9 +3299,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -3349,7 +3349,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_6 @@ -3362,9 +3362,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_7 ; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10 @@ -3455,7 +3455,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB41_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma @@ -3499,9 +3499,9 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_6 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.load7 @@ -3512,7 +3512,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7 @@ -8234,7 +8234,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 @@ -8242,7 +8242,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -8250,7 +8250,7 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -8334,9 +8334,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8382,7 +8382,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 @@ -8394,9 +8394,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10 @@ -8484,9 +8484,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8532,7 +8532,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_6 @@ -8544,9 +8544,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_7 ; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10 @@ -8638,9 +8638,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8688,7 +8688,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_6 @@ -8702,7 +8702,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_7 @@ -8796,9 +8796,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8844,7 +8844,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_6 @@ -8856,9 +8856,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_7 ; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10 @@ -8947,9 +8947,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -8995,7 +8995,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_6 @@ -9007,9 +9007,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_7 ; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10 @@ -9102,9 +9102,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 @@ -9152,7 +9152,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 ; RV64ZVE32F-NEXT: andi a3, a2, 8 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_6 @@ -9165,9 +9165,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_7 ; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10 @@ -9258,7 +9258,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB80_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma @@ -9302,9 +9302,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v14, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_6 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7 @@ -9315,7 +9315,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 @@ -12395,7 +12395,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma @@ -12417,9 +12417,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 4 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: .LBB98_8: # %else11 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -12433,7 +12433,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 ; RV64ZVE32F-NEXT: .LBB98_10: # %else14 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -12456,9 +12456,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 9 +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9 ; RV64ZVE32F-NEXT: .LBB98_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 @@ -12472,7 +12472,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 ; RV64ZVE32F-NEXT: .LBB98_17: # %else29 ; RV64ZVE32F-NEXT: slli a2, a1, 52 @@ -12484,9 +12484,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 51 @@ -12497,9 +12497,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12 ; RV64ZVE32F-NEXT: .LBB98_21: # %else35 ; RV64ZVE32F-NEXT: slli a2, a1, 50 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_23 @@ -12510,9 +12510,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: slli a2, a1, 49 @@ -12661,7 +12661,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_6 @@ -12672,9 +12672,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_7 ; RV64ZVE32F-NEXT: j .LBB98_8 @@ -12684,7 +12684,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_12 @@ -12695,9 +12695,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7 +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_13 ; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22 @@ -12707,9 +12707,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 8 +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_14 ; RV64ZVE32F-NEXT: j .LBB98_15 @@ -12719,7 +12719,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14 ; RV64ZVE32F-NEXT: slli a2, a1, 48 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_25 @@ -12730,9 +12730,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15 ; RV64ZVE32F-NEXT: slli a2, a1, 47 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_26 ; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index f52ba6f51d5c8..eb91271bcdd1b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -30,7 +30,7 @@ define void @widen_3xv4i16(ptr %x, ptr %z) { ; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vle16.v v12, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 8 @@ -80,7 +80,7 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) { ; CHECK-NO-MISALIGN-NEXT: vle8.v v12, (a2) ; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24 ; CHECK-NO-MISALIGN-NEXT: vle8.v v14, (a0) -; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; CHECK-NO-MISALIGN-NEXT: vsetvli zero, zero, e16, m1, tu, ma ; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 12, e16, m2, tu, ma ; CHECK-NO-MISALIGN-NEXT: vslideup.vi v8, v12, 8 @@ -193,7 +193,7 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) { ; CHECK-NEXT: vle16.v v12, (a2) ; CHECK-NEXT: addi a0, a0, 8 ; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 8 @@ -263,7 +263,7 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) { ; RV32-NEXT: vle16.v v12, (a0) ; RV32-NEXT: add a0, a0, a2 ; RV32-NEXT: vle16.v v14, (a0) -; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV32-NEXT: vsetivli zero, 8, e16, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 4 ; RV32-NEXT: vsetivli zero, 12, e16, m2, tu, ma ; RV32-NEXT: vslideup.vi v8, v12, 8 @@ -282,7 +282,7 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) { ; RV64-NEXT: vle16.v v12, (a0) ; RV64-NEXT: add a0, a0, a2 ; RV64-NEXT: vle16.v v14, (a0) -; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64-NEXT: vsetivli zero, 8, e16, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 4 ; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma ; RV64-NEXT: vslideup.vi v8, v12, 8 @@ -301,7 +301,7 @@ define void @strided_runtime_mismatch_4xv4i16(ptr %x, ptr %z, i64 %s, i64 %t) { ; ZVE64F-NEXT: vle16.v v12, (a0) ; ZVE64F-NEXT: add a0, a0, a2 ; ZVE64F-NEXT: vle16.v v14, (a0) -; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; ZVE64F-NEXT: vsetivli zero, 8, e16, m1, tu, ma ; ZVE64F-NEXT: vslideup.vi v8, v10, 4 ; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma ; ZVE64F-NEXT: vslideup.vi v8, v12, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index 31e7e7be76c89..4741e55ab3a05 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -460,54 +460,49 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s1, 0(a0) +; CHECK-V-NEXT: lhu s2, 8(a0) +; CHECK-V-NEXT: lhu a0, 16(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 @@ -632,54 +627,49 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s1, 0(a0) +; CHECK-V-NEXT: lhu s2, 8(a0) +; CHECK-V-NEXT: lhu a0, 16(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -813,54 +803,49 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s1, 0(a0) +; CHECK-V-NEXT: lhu s2, 8(a0) +; CHECK-V-NEXT: lhu a0, 16(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -1445,9 +1430,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1466,63 +1451,105 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 8 @@ -1533,7 +1560,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1728,9 +1755,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1749,63 +1776,105 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 @@ -1814,7 +1883,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2031,9 +2100,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -2052,63 +2121,105 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 -; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s0 -; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 +; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 @@ -2118,7 +2229,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -3796,54 +3907,49 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s1, 0(a0) +; CHECK-V-NEXT: lhu s2, 8(a0) +; CHECK-V-NEXT: lhu a0, 16(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: lui a0, 524288 @@ -3966,54 +4072,49 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s1, 0(a0) +; CHECK-V-NEXT: lhu s2, 8(a0) +; CHECK-V-NEXT: lhu a0, 16(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -4146,54 +4247,49 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: sub sp, sp, a1 ; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: lhu s1, 0(a0) +; CHECK-V-NEXT: lhu s2, 8(a0) +; CHECK-V-NEXT: lhu a0, 16(a0) ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-V-NEXT: fmv.w.x fa0, s2 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload -; CHECK-V-NEXT: vslideup.vi v8, v10, 1 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: fmv.w.x fa0, s1 +; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-V-NEXT: vmv.s.x v10, a0 +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vslideup.vi v10, v8, 1 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 -; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 -; CHECK-V-NEXT: add a0, sp, a0 -; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 ; CHECK-V-NEXT: li a0, -1 @@ -4766,9 +4862,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -4787,63 +4883,105 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 8 @@ -4854,7 +4992,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5045,9 +5183,9 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -5066,63 +5204,105 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 @@ -5131,7 +5311,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5347,9 +5527,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: slli a1, a1, 2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 4 * vlenb ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -5368,63 +5548,105 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 7 ; CHECK-V-NEXT: lui a0, 16 @@ -5434,7 +5656,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 2 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll index 4bd9f7befa52a..141d1b9ed9544 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -119,7 +119,7 @@ define @insertelt_nxv8f16_0( %v, half %el define @insertelt_nxv8f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv8f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -153,7 +153,7 @@ define @insertelt_nxv16f16_0( %v, half define @insertelt_nxv16f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv16f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -187,7 +187,7 @@ define @insertelt_nxv32f16_0( %v, half define @insertelt_nxv32f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv32f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -289,7 +289,7 @@ define @insertelt_nxv4f32_0( %v, float define @insertelt_nxv4f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv4f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -323,7 +323,7 @@ define @insertelt_nxv8f32_0( %v, float define @insertelt_nxv8f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv8f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -357,7 +357,7 @@ define @insertelt_nxv16f32_0( %v, flo define @insertelt_nxv16f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv16f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -459,7 +459,7 @@ define @insertelt_nxv4f64_0( %v, doub define @insertelt_nxv4f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv4f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -493,7 +493,7 @@ define @insertelt_nxv8f64_0( %v, doub define @insertelt_nxv8f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv8f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll index a7bd15f2a7b33..911072d9571ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -149,7 +149,7 @@ define @insertelt_nxv16i1( %x, i1 %elt) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetivli zero, 3, e8, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -184,7 +184,7 @@ define @insertelt_nxv32i1( %x, i1 %elt) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsetivli zero, 3, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -219,7 +219,7 @@ define @insertelt_nxv64i1( %x, i1 %elt) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: vsetivli zero, 3, e8, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll index 39f94eab2aa66..c56f3df66dee9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -151,7 +151,7 @@ define @insertelt_nxv16i8_0( %v, i8 signext define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -185,7 +185,7 @@ define @insertelt_nxv32i8_0( %v, i8 signext define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -219,7 +219,7 @@ define @insertelt_nxv64i8_0( %v, i8 signext define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -355,7 +355,7 @@ define @insertelt_nxv8i16_0( %v, i16 signex define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -389,7 +389,7 @@ define @insertelt_nxv16i16_0( %v, i16 sig define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -423,7 +423,7 @@ define @insertelt_nxv32i16_0( %v, i16 sig define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -525,7 +525,7 @@ define @insertelt_nxv4i32_0( %v, i32 %elt) define @insertelt_nxv4i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -559,7 +559,7 @@ define @insertelt_nxv8i32_0( %v, i32 %elt) define @insertelt_nxv8i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -593,7 +593,7 @@ define @insertelt_nxv16i32_0( %v, i32 %el define @insertelt_nxv16i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -707,7 +707,7 @@ define @insertelt_nxv4i64_imm( %v, i64 %elt ; CHECK-NEXT: vsetivli zero, 2, e32, m4, ta, ma ; CHECK-NEXT: vslide1down.vx v12, v8, a0 ; CHECK-NEXT: vslide1down.vx v12, v12, a1 -; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -745,7 +745,7 @@ define @insertelt_nxv8i64_imm( %v, i64 %elt ; CHECK-NEXT: vsetivli zero, 2, e32, m8, ta, ma ; CHECK-NEXT: vslide1down.vx v16, v8, a0 ; CHECK-NEXT: vslide1down.vx v16, v16, a1 -; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll index 1dd00197bbbb0..44a68a5e2ca31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -151,7 +151,7 @@ define @insertelt_nxv16i8_0( %v, i8 signext define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -185,7 +185,7 @@ define @insertelt_nxv32i8_0( %v, i8 signext define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -219,7 +219,7 @@ define @insertelt_nxv64i8_0( %v, i8 signext define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -355,7 +355,7 @@ define @insertelt_nxv8i16_0( %v, i16 signex define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -389,7 +389,7 @@ define @insertelt_nxv16i16_0( %v, i16 sig define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -423,7 +423,7 @@ define @insertelt_nxv32i16_0( %v, i16 sig define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -525,7 +525,7 @@ define @insertelt_nxv4i32_0( %v, i32 signex define @insertelt_nxv4i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret @@ -559,7 +559,7 @@ define @insertelt_nxv8i32_0( %v, i32 signex define @insertelt_nxv8i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -593,7 +593,7 @@ define @insertelt_nxv16i32_0( %v, i32 sig define @insertelt_nxv16i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret @@ -699,7 +699,7 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) define @insertelt_nxv4i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret @@ -735,7 +735,7 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) define @insertelt_nxv8i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret