diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 89e017807363b..52bb10f9ba19b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -589,8 +589,6 @@ void RISCVPassConfig::addPreEmitPass2() { void RISCVPassConfig::addMachineSSAOptimization() { addPass(createRISCVVectorPeepholePass()); - // TODO: Move this to pre regalloc - addPass(createRISCVVMV0EliminationPass()); addPass(createRISCVFoldMemOffsetPass()); TargetPassConfig::addMachineSSAOptimization(); @@ -604,10 +602,6 @@ void RISCVPassConfig::addMachineSSAOptimization() { } void RISCVPassConfig::addPreRegAlloc() { - // TODO: Move this as late as possible before regalloc - if (TM->getOptLevel() == CodeGenOptLevel::None) - addPass(createRISCVVMV0EliminationPass()); - addPass(createRISCVPreRAExpandPseudoPass()); if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); @@ -621,6 +615,8 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner) addPass(&MachinePipelinerID); + + addPass(createRISCVVMV0EliminationPass()); } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp index ccc86da340440..9737474a18f63 100644 --- a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp +++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp @@ -131,10 +131,9 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) { // Peek through a single copy to match what isel does. if (MachineInstr *SrcMI = MRI.getVRegDef(Src); - SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual()) { - assert(SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister); + SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual() && + SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister) Src = SrcMI->getOperand(1).getReg(); - } BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0) .addReg(Src); diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index a50c303819f23..f93cb65897210 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -39,11 +39,11 @@ ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Eliminate PHI nodes for register allocation ; CHECK-NEXT: Two-Address instruction pass diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 194223eee69eb..b67fbe1b5d3cd 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -97,7 +97,6 @@ ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: RISC-V Vector Peephole Optimization -; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: RISC-V Fold Memory Offset ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication @@ -129,6 +128,7 @@ ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup +; CHECK-NEXT: RISC-V VMV0 Elimination ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass ; CHECK-NEXT: Process Implicit Definitions diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index 1b9c78a20ec3b..039266b169ab2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1515,40 +1515,36 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll index e26c467f025bd..5f35626120178 100644 --- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll +++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll @@ -26,10 +26,9 @@ define @commutable_vadd_vv_masked( %0, @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -59,10 +58,9 @@ define @commutable_vand_vv_masked( %0, @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vand.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -92,10 +90,9 @@ define @commutable_vor_vv_masked( %0, @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -125,10 +122,9 @@ define @commutable_vxor_vv_masked( %0, @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vxor.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -158,10 +154,9 @@ define @commutable_vmseq_vv_masked( %0, @llvm.riscv.vmseq.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmseq.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -191,10 +186,9 @@ define @commutable_vmsne_vv_masked( %0, @llvm.riscv.vmsne.mask.nxv1i64( undef, %0, %1, %mask, iXLen %2) %b = call @llvm.riscv.vmsne.mask.nxv1i64( undef, %1, %0, %mask, iXLen %2) @@ -224,10 +218,9 @@ define @commutable_vmin_vv_masked( %0, @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmin.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -257,10 +250,9 @@ define @commutable_vminu_vv_masked( %0, @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vminu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -290,10 +282,9 @@ define @commutable_vmax_vv_masked( %0, @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmax.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -323,10 +314,9 @@ define @commutable_vmaxu_vv_masked( %0, @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -356,10 +346,9 @@ define @commutable_vmul_vv_masked( %0, @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -389,10 +378,9 @@ define @commutable_vmulh_vv_masked( %0, @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -422,10 +410,9 @@ define @commutable_vmulhu_vv_masked( %0, @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -456,9 +443,8 @@ define @commutable_vwadd_vv_masked( %0, @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -489,9 +475,8 @@ define @commutable_vwaddu_vv_masked( %0, @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -522,9 +507,8 @@ define @commutable_vwmul_vv_masked( %0, @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -555,9 +539,8 @@ define @commutable_vwmulu_vv_masked( %0, @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -637,10 +620,9 @@ define @commutable_vadc_vv( %0, @llvm.riscv.vadc.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2) @@ -671,10 +653,9 @@ define @commutable_vsadd_vv_masked( %0, @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -704,10 +685,9 @@ define @commutable_vsaddu_vv_masked( %0, @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen %2, iXLen 1) @@ -739,10 +719,9 @@ define @commutable_vaadd_vv_masked( %0, @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -774,10 +753,9 @@ define @commutable_vaaddu_vv_masked( %0, @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) @@ -809,10 +787,9 @@ define @commutable_vsmul_vv_masked( %0, @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %0, %1, %mask, iXLen 0, iXLen %2, iXLen 1) %b = call @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64( undef, %1, %0, %mask, iXLen 0, iXLen %2, iXLen 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir index 0b905b57f92b8..be73d4808937a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir +++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir @@ -6,9 +6,6 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma - ; CHECK-NEXT: vmsne.vi v0, v8, 0 - ; CHECK-NEXT: vsll.vi v8, v8, 5 - ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1 ; CHECK-NEXT: bgeu a0, zero, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %entry @@ -22,7 +19,10 @@ ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: bgeu a0, a2, .LBB0_2 ; CHECK-NEXT: .LBB0_4: # %entry - ; CHECK-NEXT: vse64.v v8, (a1) + ; CHECK-NEXT: vmsne.vi v0, v8, 0 + ; CHECK-NEXT: vsll.vi v8, v8, 5 + ; CHECK-NEXT: vmerge.vim v9, v8, -1, v0 + ; CHECK-NEXT: vse64.v v9, (a1) ; CHECK-NEXT: ret entry: ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 5f275da1740cb..cd4b19f11d160 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1735,8 +1735,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1771,7 +1770,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill @@ -1786,12 +1786,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -1805,21 +1806,24 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t ; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload @@ -1837,7 +1841,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -1851,7 +1856,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -1869,8 +1875,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -1886,41 +1891,28 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsub.vv v24, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 @@ -1932,7 +1924,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3928,8 +3921,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -3964,7 +3956,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill @@ -3979,12 +3972,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -3998,21 +3992,24 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t ; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload @@ -4030,7 +4027,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4044,7 +4042,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload @@ -4062,8 +4061,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -4079,41 +4077,28 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsub.vv v24, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 @@ -4125,7 +4110,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index bef29dfecef4c..c885b3c03270c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -3902,12 +3902,10 @@ define void @trunc_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -3969,10 +3967,8 @@ define void @trunc_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI172_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu @@ -3986,12 +3982,10 @@ define void @trunc_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -4082,13 +4076,11 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4155,11 +4147,9 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 3 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4174,13 +4164,11 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 3 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4276,13 +4264,11 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4349,11 +4335,9 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 2 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4368,13 +4352,11 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 2 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4470,13 +4452,11 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4543,11 +4523,9 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) -; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: fsrmi a1, 4 -; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4562,13 +4540,11 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 ; ZVFHMIN-NEXT: fsrmi a1, 4 -; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index a91dee1cb245f..037ed257f4a89 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <2 x i7> @llvm.vp.trunc.v2i7.v2i16(<2 x i16>, <2 x i1>, i32) @@ -222,316 +222,645 @@ define <2 x i32> @vtrunc_v2i32_v2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32) define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v128i32_v128i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 72 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vi v6, v0, 8 -; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: addi a3, a1, 640 -; CHECK-NEXT: addi a4, a7, -64 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v6, 4 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: sltu a3, a7, a4 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a4, a3, a4 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a5, a1, 128 -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v7, 4 -; CHECK-NEXT: bltu a3, a2, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v26, 2 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 6 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a5, .LBB16_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a6, 64 -; CHECK-NEXT: .LBB16_4: -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: addi a5, a1, 384 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: li t1, 48 -; CHECK-NEXT: mul t0, t0, t1 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: addi t0, a6, -32 -; CHECK-NEXT: sltu a6, a6, t0 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, t0 -; CHECK-NEXT: addi t0, a6, -16 -; CHECK-NEXT: sltu t1, a6, t0 -; CHECK-NEXT: addi t1, t1, -1 -; CHECK-NEXT: and t0, t1, t0 -; CHECK-NEXT: csrr t1, vlenb -; CHECK-NEXT: li t2, 56 -; CHECK-NEXT: mul t1, t1, t2 -; CHECK-NEXT: add t1, sp, t1 -; CHECK-NEXT: addi t1, t1, 16 -; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a6, a2, .LBB16_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v6, 2 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li t0, 48 -; CHECK-NEXT: mul a5, a5, t0 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: bltu a4, a3, .LBB16_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a5, -16 -; CHECK-NEXT: sltu a5, a5, a1 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a1, a5, a1 -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a1, a1, a5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a2, .LBB16_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB16_10: -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v25, v7, 2 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a1, a1, a4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 4 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v24, v16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 3 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 40 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 6 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v24, 16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 6 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a4, a1, -16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 40 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: sltu a1, a1, a4 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a1, a1, a4 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a7, a2, .LBB16_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: li a7, 16 -; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 16 -; CHECK-NEXT: vse32.v v24, (a0) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a0, a0, 384 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 6 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 72 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: ret +; RV32-LABEL: vtrunc_v128i32_v128i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 72 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb +; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v7, v0 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vslidedown.vi v5, v0, 8 +; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v4, v0, 4 +; RV32-NEXT: addi a2, a7, -64 +; RV32-NEXT: vslidedown.vi v3, v5, 4 +; RV32-NEXT: sltu a3, a7, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a4, a3, a2 +; RV32-NEXT: addi a2, a4, -32 +; RV32-NEXT: sltu a3, a4, a2 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: and a3, a3, a2 +; RV32-NEXT: li a2, 16 +; RV32-NEXT: addi t0, a3, -16 +; RV32-NEXT: mv a5, a3 +; RV32-NEXT: bltu a3, a2, .LBB16_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a5, 16 +; RV32-NEXT: .LBB16_2: +; RV32-NEXT: li t2, 64 +; RV32-NEXT: sltu t1, a3, t0 +; RV32-NEXT: mv a6, a7 +; RV32-NEXT: bltu a7, t2, .LBB16_4 +; RV32-NEXT: # %bb.3: +; RV32-NEXT: li a6, 64 +; RV32-NEXT: .LBB16_4: +; RV32-NEXT: addi t3, a1, 128 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v6, v4, 2 +; RV32-NEXT: addi s0, a1, 512 +; RV32-NEXT: addi t6, a1, 640 +; RV32-NEXT: vslidedown.vi v0, v3, 2 +; RV32-NEXT: addi t1, t1, -1 +; RV32-NEXT: addi t2, a1, 384 +; RV32-NEXT: vslidedown.vi v2, v5, 2 +; RV32-NEXT: li a3, 32 +; RV32-NEXT: addi t4, a6, -32 +; RV32-NEXT: sltu a6, a6, t4 +; RV32-NEXT: addi a6, a6, -1 +; RV32-NEXT: and a6, a6, t4 +; RV32-NEXT: addi t4, a6, -16 +; RV32-NEXT: sltu t5, a6, t4 +; RV32-NEXT: addi t5, t5, -1 +; RV32-NEXT: bltu a6, a2, .LBB16_6 +; RV32-NEXT: # %bb.5: +; RV32-NEXT: li a6, 16 +; RV32-NEXT: .LBB16_6: +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v8, (s0) +; RV32-NEXT: csrr s0, vlenb +; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: li a0, 56 +; RV32-NEXT: mul s0, s0, a0 +; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: add s0, sp, s0 +; RV32-NEXT: addi s0, s0, 16 +; RV32-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v16, (t6) +; RV32-NEXT: vle64.v v8, (t3) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: slli t3, t3, 3 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: csrr t3, vlenb +; RV32-NEXT: li t6, 48 +; RV32-NEXT: mul t3, t3, t6 +; RV32-NEXT: add t3, sp, t3 +; RV32-NEXT: addi t3, t3, 16 +; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v8, (t2) +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: slli t2, t2, 4 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 16 +; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV32-NEXT: and t2, t1, t0 +; RV32-NEXT: and t1, t5, t4 +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: mv t0, a4 +; RV32-NEXT: bltu a4, a3, .LBB16_8 +; RV32-NEXT: # %bb.7: +; RV32-NEXT: li t0, 32 +; RV32-NEXT: .LBB16_8: +; RV32-NEXT: vsetvli zero, t2, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV32-NEXT: addi t2, sp, 16 +; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: csrr t2, vlenb +; RV32-NEXT: li t3, 56 +; RV32-NEXT: mul t2, t2, t3 +; RV32-NEXT: add t2, sp, t2 +; RV32-NEXT: addi t2, t2, 16 +; RV32-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, t1, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li t1, 24 +; RV32-NEXT: mul a5, a5, t1 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: addi a5, t0, -16 +; RV32-NEXT: sltu t0, t0, a5 +; RV32-NEXT: addi t0, t0, -1 +; RV32-NEXT: and a5, t0, a5 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v6, v7, 2 +; RV32-NEXT: vmv1r.v v0, v4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li t0, 48 +; RV32-NEXT: mul a1, a1, t0 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a1, a1, a6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v24, v16, 0, v0.t +; RV32-NEXT: bltu a4, a2, .LBB16_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: li a4, 16 +; RV32-NEXT: .LBB16_10: +; RV32-NEXT: vmv1r.v v0, v5 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: mv a1, a7 +; RV32-NEXT: bltu a7, a3, .LBB16_12 +; RV32-NEXT: # %bb.11: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: .LBB16_12: +; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vslideup.vi v24, v8, 16 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: addi a4, a1, -16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v16, 16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 48 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 24 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v8, v16, 16 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 48 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: sltu a1, a1, a4 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV32-NEXT: bltu a7, a2, .LBB16_14 +; RV32-NEXT: # %bb.13: +; RV32-NEXT: li a7, 16 +; RV32-NEXT: .LBB16_14: +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32-NEXT: vslideup.vi v16, v8, 16 +; RV32-NEXT: vse32.v v16, (a0) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a0, a0, 384 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 72 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa sp, 32 +; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore s0 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vtrunc_v128i32_v128i64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: .cfi_def_cfa_offset 48 +; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset s0, -8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 72 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb +; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v7, v0 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 40 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v5, v0, 8 +; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v4, v0, 4 +; RV64-NEXT: addi a2, a7, -64 +; RV64-NEXT: vslidedown.vi v3, v5, 4 +; RV64-NEXT: sltu a3, a7, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a4, a3, a2 +; RV64-NEXT: addi a2, a4, -32 +; RV64-NEXT: sltu a3, a4, a2 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a3, a3, a2 +; RV64-NEXT: li a2, 16 +; RV64-NEXT: addi t0, a3, -16 +; RV64-NEXT: mv a5, a3 +; RV64-NEXT: bltu a3, a2, .LBB16_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a5, 16 +; RV64-NEXT: .LBB16_2: +; RV64-NEXT: li t2, 64 +; RV64-NEXT: sltu t1, a3, t0 +; RV64-NEXT: mv a6, a7 +; RV64-NEXT: bltu a7, t2, .LBB16_4 +; RV64-NEXT: # %bb.3: +; RV64-NEXT: li a6, 64 +; RV64-NEXT: .LBB16_4: +; RV64-NEXT: addi t3, a1, 128 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v6, v4, 2 +; RV64-NEXT: addi s0, a1, 512 +; RV64-NEXT: addi t6, a1, 640 +; RV64-NEXT: vslidedown.vi v0, v3, 2 +; RV64-NEXT: addi t1, t1, -1 +; RV64-NEXT: addi t2, a1, 384 +; RV64-NEXT: vslidedown.vi v2, v5, 2 +; RV64-NEXT: li a3, 32 +; RV64-NEXT: addi t4, a6, -32 +; RV64-NEXT: sltu a6, a6, t4 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: and a6, a6, t4 +; RV64-NEXT: addi t4, a6, -16 +; RV64-NEXT: sltu t5, a6, t4 +; RV64-NEXT: addi t5, t5, -1 +; RV64-NEXT: bltu a6, a2, .LBB16_6 +; RV64-NEXT: # %bb.5: +; RV64-NEXT: li a6, 16 +; RV64-NEXT: .LBB16_6: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (s0) +; RV64-NEXT: csrr s0, vlenb +; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a0, 56 +; RV64-NEXT: mul s0, s0, a0 +; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: add s0, sp, s0 +; RV64-NEXT: addi s0, s0, 32 +; RV64-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v16, (t6) +; RV64-NEXT: vle64.v v8, (t3) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: slli t3, t3, 3 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr t3, vlenb +; RV64-NEXT: li t6, 48 +; RV64-NEXT: mul t3, t3, t6 +; RV64-NEXT: add t3, sp, t3 +; RV64-NEXT: addi t3, t3, 32 +; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (t2) +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: slli t2, t2, 4 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 32 +; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV64-NEXT: and t2, t1, t0 +; RV64-NEXT: and t1, t5, t4 +; RV64-NEXT: addi a1, a1, 256 +; RV64-NEXT: mv t0, a4 +; RV64-NEXT: bltu a4, a3, .LBB16_8 +; RV64-NEXT: # %bb.7: +; RV64-NEXT: li t0, 32 +; RV64-NEXT: .LBB16_8: +; RV64-NEXT: vsetvli zero, t2, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV64-NEXT: addi t2, sp, 32 +; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: csrr t2, vlenb +; RV64-NEXT: li t3, 56 +; RV64-NEXT: mul t2, t2, t3 +; RV64-NEXT: add t2, sp, t2 +; RV64-NEXT: addi t2, t2, 32 +; RV64-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: slli a5, a5, 3 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, t1, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li t1, 24 +; RV64-NEXT: mul a5, a5, t1 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: addi a5, t0, -16 +; RV64-NEXT: sltu t0, t0, a5 +; RV64-NEXT: addi t0, t0, -1 +; RV64-NEXT: and a5, t0, a5 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v6, v7, 2 +; RV64-NEXT: vmv1r.v v0, v4 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li t0, 48 +; RV64-NEXT: mul a1, a1, t0 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a6, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a1, a1, a6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v24, v16, 0, v0.t +; RV64-NEXT: bltu a4, a2, .LBB16_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: li a4, 16 +; RV64-NEXT: .LBB16_10: +; RV64-NEXT: vmv1r.v v0, v5 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a4, 48 +; RV64-NEXT: mul a1, a1, a4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: mv a1, a7 +; RV64-NEXT: bltu a7, a3, .LBB16_12 +; RV64-NEXT: # %bb.11: +; RV64-NEXT: li a1, 32 +; RV64-NEXT: .LBB16_12: +; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: addi a4, sp, 32 +; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 24 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: li a5, 24 +; RV64-NEXT: mul a4, a4, a5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vslideup.vi v24, v8, 16 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 6 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a4, a1, -16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 56 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 48 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 24 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v8, v16, 16 +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 48 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 32 +; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: sltu a1, a1, a4 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a4 +; RV64-NEXT: csrr a4, vlenb +; RV64-NEXT: slli a4, a4, 5 +; RV64-NEXT: add a4, sp, a4 +; RV64-NEXT: addi a4, a4, 32 +; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t +; RV64-NEXT: bltu a7, a2, .LBB16_14 +; RV64-NEXT: # %bb.13: +; RV64-NEXT: li a7, 16 +; RV64-NEXT: .LBB16_14: +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a7, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t +; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64-NEXT: vslideup.vi v16, v8, 16 +; RV64-NEXT: vse32.v v16, (a0) +; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 56 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 32 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a1) +; RV64-NEXT: addi a0, a0, 384 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 32 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vse32.v v8, (a0) +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 72 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa sp, 48 +; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore s0 +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret %v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl) ret <128 x i32> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 6c9989775f790..8e2e8f3fb0dec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -402,29 +402,29 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: -; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: sltu a3, a3, a4 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: addi a5, a3, -16 ; CHECK-NEXT: addi a4, a1, 128 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a4), v0.t -; CHECK-NEXT: addi a3, a2, -32 -; CHECK-NEXT: sltu a4, a2, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: addi a7, a2, -32 +; CHECK-NEXT: sltu a3, a3, a5 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a6, a3, a5 +; CHECK-NEXT: sltu a3, a2, a7 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a5, a3, a7 ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: bltu a4, a3, .LBB32_4 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: bltu a5, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a5, a1, 256 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: addi a4, a1, 256 +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a4), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 557882ee31d4c..984bc5b2c7352 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -26,9 +26,10 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -56,9 +57,10 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -94,9 +96,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -124,9 +127,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -163,9 +167,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -193,9 +198,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -231,9 +237,10 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -261,9 +268,10 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu +; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -299,9 +307,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -329,9 +338,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret @@ -368,9 +378,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vse32.v v8, (a2) ; RV32-NEXT: ret @@ -398,9 +409,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 0, v0 ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index f9b5095c9af1d..9b5bde2814fda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1515,40 +1515,36 @@ define @vp_floor_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t ; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: fsflags a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 39744dcecd718..bc4b3ad7f79f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1515,40 +1515,36 @@ define @vp_round_nxv16f64( %va, @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 1300d8cd64ebb..75615fe0fe759 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1515,40 +1515,36 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index e6272701a6033..91442e2b75682 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1398,8 +1398,6 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, %data, <16 x i8> %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -29,12 +30,12 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -53,12 +54,12 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a1, .LBB2_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -77,14 +78,14 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: vid.v v9, v0.t ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v9, v9, v9 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vid.v v9, v0.t +; RV32-NEXT: vredmaxu.vs v9, v9, v9 ; RV32-NEXT: vmv.x.s a0, v9 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma @@ -100,12 +101,12 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vcpop.m a1, v0 -; RV64-NEXT: vid.v v9, v0.t ; RV64-NEXT: beqz a1, .LBB3_2 ; RV64-NEXT: # %bb.1: +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: vid.v v9, v0.t ; RV64-NEXT: vredmaxu.vs v9, v9, v9 ; RV64-NEXT: vmv.x.s a0, v9 ; RV64-NEXT: andi a0, a0, 255 @@ -124,12 +125,12 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a0, .LBB4_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -148,12 +149,12 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: beqz a0, .LBB5_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 ; CHECK-NEXT: vmv.x.s a0, v9 ; CHECK-NEXT: andi a0, a0, 255 @@ -170,12 +171,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % define i8 @extract_last_i8_scalable( %data, %mask, i8 %passthru) { ; CHECK-LABEL: extract_last_i8_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -191,12 +193,13 @@ define i8 @extract_last_i8_scalable( %data, define i16 @extract_last_i16_scalable( %data, %mask, i16 %passthru) { ; CHECK-LABEL: extract_last_i16_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -212,12 +215,13 @@ define i16 @extract_last_i16_scalable( %data, %data, %mask, i32 %passthru) { ; CHECK-LABEL: extract_last_i32_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a1, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -233,14 +237,15 @@ define i32 @extract_last_i32_scalable( %data, %data, %mask, i64 %passthru) { ; RV32-LABEL: extract_last_i64_scalable: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vcpop.m a2, v0 -; RV32-NEXT: vid.v v10, v0.t ; RV32-NEXT: beqz a2, .LBB9_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: vredmaxu.vs v10, v10, v10 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; RV32-NEXT: vid.v v10, v0.t +; RV32-NEXT: vredmaxu.vs v10, v10, v10 ; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: andi a0, a0, 255 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma @@ -254,12 +259,13 @@ define i64 @extract_last_i64_scalable( %data, %data, %data, %mask, float %passthru) { ; CHECK-LABEL: extract_last_float_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB10_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 @@ -296,12 +303,13 @@ define float @extract_last_float_scalable( %data, %data, %mask, double %passthru) { ; CHECK-LABEL: extract_last_double_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vcpop.m a0, v0 -; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: beqz a0, .LBB11_2 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 ; CHECK-NEXT: vmv.x.s a0, v10 ; CHECK-NEXT: andi a0, a0, 255 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll index 6435c1c14e061..fd1dbab2362a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll @@ -222,9 +222,9 @@ define @vadd_vv_mask_negative( %0, @llvm.riscv.vadd.mask.nxv1i8.nxv1i8( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 9e78bbdc4f441..6831d1fb63cae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -429,16 +429,16 @@ define @vfadd_vv_nxv32bf16( %va, @vfadd_vv_nxv32f16( %va, @vfdiv_vv_nxv32bf16( %va, @vfdiv_vv_nxv32f16( %va, @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmv8r.v v24, v8 -; ZVFHMIN-NEXT: vl8re16.v v8, (a0) +; ZVFHMIN-NEXT: vl8re16.v v24, (a0) ; ZVFHMIN-NEXT: lui a2, 8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a3, vlenb ; ZVFHMIN-NEXT: slli a0, a3, 1 @@ -8516,25 +8515,25 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3 ; ZVFHMIN-NEXT: sltu a3, a1, a4 ; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; ZVFHMIN-NEXT: vxor.vx v16, v8, a2 +; ZVFHMIN-NEXT: vxor.vx v16, v24, a2 ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 -; ZVFHMIN-NEXT: vmv4r.v v8, v16 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -8543,35 +8542,32 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t ; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a1, a0 ; ZVFHMIN-NEXT: .LBB281_2: -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: mv a1, a0 @@ -8579,15 +8575,12 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24 +; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb @@ -10079,36 +10072,34 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: addi a3, a3, -1 ; ZVFHMIN-NEXT: and a3, a3, a4 ; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 4 +; ZVFHMIN-NEXT: slli a4, a4, 3 +; ZVFHMIN-NEXT: mv a5, a4 +; ZVFHMIN-NEXT: slli a4, a4, 1 +; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: addi a4, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a4, a4, 3 -; ZVFHMIN-NEXT: mv a5, a4 -; ZVFHMIN-NEXT: slli a4, a4, 1 -; ZVFHMIN-NEXT: add a4, a4, a5 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t @@ -10119,34 +10110,31 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB292_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: mv a2, a1 +; ZVFHMIN-NEXT: slli a1, a1, 1 +; ZVFHMIN-NEXT: add a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: mv a1, a0 -; ZVFHMIN-NEXT: slli a0, a0, 1 -; ZVFHMIN-NEXT: add a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma @@ -10307,6 +10295,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v7, v0 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -10315,11 +10304,11 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: fmv.x.h a1, fa0 ; ZVFHMIN-NEXT: lui a2, 8 ; ZVFHMIN-NEXT: csrr a3, vlenb -; ZVFHMIN-NEXT: vmv.v.x v24, a1 +; ZVFHMIN-NEXT: vmv.v.x v16, a1 ; ZVFHMIN-NEXT: slli a1, a3, 1 ; ZVFHMIN-NEXT: srli a3, a3, 2 -; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t -; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t +; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t ; ZVFHMIN-NEXT: sub a2, a0, a1 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3 @@ -10330,14 +10319,15 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: slli a3, a3, 4 ; ZVFHMIN-NEXT: add a3, sp, a3 ; ZVFHMIN-NEXT: addi a3, a3, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv4r.v v16, v8 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: mv a3, a2 @@ -10345,7 +10335,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat( ; ZVFHMIN-NEXT: add a2, a2, a3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a2, a2, 5 @@ -10481,7 +10471,7 @@ define @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmadd_vf_nxv32f16_neg_splat_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( @vfmax_vv_nxv32bf16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32bf16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @llvm.vp.fptrunc.nxv16f64.nxv16f32( @vfptrunc_nxv16f32_nxv16f64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: sub a3, a0, a1 @@ -113,24 +105,16 @@ define @vfptrunc_nxv16f32_nxv16f64( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) ret %v @@ -144,58 +128,68 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: slli a6, a1, 3 -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: vslidedown.vx v16, v0, a5 -; CHECK-NEXT: add a6, a0, a6 -; CHECK-NEXT: sub a5, a2, a4 -; CHECK-NEXT: vl8re64.v v24, (a6) -; CHECK-NEXT: sltu a6, a2, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: sub a6, a5, a1 -; CHECK-NEXT: sltu a7, a5, a6 -; CHECK-NEXT: addi a7, a7, -1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB8_2 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a6, a0, a4 +; CHECK-NEXT: sub a0, a2, a3 +; CHECK-NEXT: sltu a4, a2, a0 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a4, a0, a1 +; CHECK-NEXT: sltu a7, a0, a4 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: and a4, a7, a4 +; CHECK-NEXT: srli a7, a1, 2 +; CHECK-NEXT: vl8re64.v v8, (a6) +; CHECK-NEXT: vslidedown.vx v16, v0, a7 +; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v25, v0, a5 +; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: bltu a0, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB8_4 +; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t @@ -203,9 +197,9 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -213,7 +207,8 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 059408a1c9c3f..056c7557440e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -391,16 +391,16 @@ define @vfsub_vv_nxv32bf16( %va, @vfsub_vv_nxv32f16( %va, % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 +; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a2, -1 +; RV32-NEXT: li a1, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: csrr a3, vlenb @@ -340,21 +341,19 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32-NEXT: vmerge.vim v11, v9, 1, v0 ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: vwmaccu.vx v12, a1, v11 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v11, v12, a3 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a2, a3, a3 +; RV32-NEXT: add a1, a3, a3 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v9, a3 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: slli a2, a1, 1 -; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma @@ -676,6 +675,7 @@ define {, } @not_same_mask( ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -698,10 +698,8 @@ define {, } @not_same_mask( ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v8, a3 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index 1007d1ce649cc..68e7297605be2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -304,12 +304,12 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @vpload_nxv17f64(ptr %ptr, ptr %out, %v, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v11, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v10, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB22_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -371,21 +372,22 @@ define float @vreduce_fminimum_nxv4f32(float %start, %val, define float @vreduce_fmaximum_nxv4f32(float %start, %val, %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t -; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v11, v0.t +; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v10, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB23_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB23_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v10 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, %val, %m, i32 %evl) @@ -421,21 +423,22 @@ define float @vreduce_fmaximum_nnan_nxv4f32(float %start, % define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fminimum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v8, v0.t +; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v9, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB26_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB26_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) @@ -445,21 +448,22 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vreduce_fmaximum_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: feq.s a1, fa0, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t -; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t -; CHECK-NEXT: vcpop.m a0, v8, v0.t +; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t +; CHECK-NEXT: feq.s a1, fa0, fa0 +; CHECK-NEXT: vcpop.m a2, v9, v0.t ; CHECK-NEXT: xori a1, a1, 1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: beqz a0, .LBB27_2 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v9 ; CHECK-NEXT: ret %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index fd5bf4ebcede8..32d24778d7327 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -285,58 +285,68 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a,