diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c56877b9fcfe4..1e725fcfac72d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -20,6 +20,7 @@ #include "RISCVSelectionDAGInfo.h" #include "RISCVSubtarget.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" @@ -15502,6 +15503,32 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } +// Try to expand a multiply to a sequence of shifts and add/subs, +// for a machine without native mul instruction. +static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, + uint64_t MulAmt) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + const uint64_t BitWidth = VT.getFixedSizeInBits(); + + SDValue Result = DAG.getConstant(0, DL, N->getValueType(0)); + SDValue N0 = N->getOperand(0); + + // Find the Non-adjacent form of the multiplier. + for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) { + if (E & 1) { + bool IsAdd = (E & 3) == 1; + E -= IsAdd ? 1 : -1; + SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getShiftAmountConstant(I, VT, DL)); + ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB; + Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal); + } + } + + return Result; +} + // X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2)) static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt) { @@ -15537,21 +15564,24 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (DAG.getMachineFunction().getFunction().hasMinSize()) return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); - if (VT != Subtarget.getXLenVT()) return SDValue(); - const bool HasShlAdd = Subtarget.hasStdExtZba() || - Subtarget.hasVendorXTHeadBa() || - Subtarget.hasVendorXAndesPerf(); + bool ShouldExpandMul = + (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) || + !Subtarget.hasStdExtZmmul(); + if (!ShouldExpandMul) + return SDValue(); ConstantSDNode *CNode = dyn_cast(N->getOperand(1)); if (!CNode) return SDValue(); uint64_t MulAmt = CNode->getZExtValue(); + const bool HasShlAdd = Subtarget.hasStdExtZba() || + Subtarget.hasVendorXTHeadBa() || + Subtarget.hasVendorXAndesPerf(); + // WARNING: The code below is knowingly incorrect with regards to undef semantics. // We're adding additional uses of X here, and in principle, we should be freezing // X before doing so. However, adding freeze here causes real regressions, and no @@ -15689,6 +15719,9 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt)) return V; + if (!Subtarget.hasStdExtZmmul()) + return expandMulToNAFSequence(N, DAG, MulAmt); + return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 3a7d31253b05d..8b9d602dcde83 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -262,20 +262,33 @@ define i32 @test_cttz_i32(i32 %a) nounwind { ; RV64I-NEXT: sext.w a1, a0 ; RV64I-NEXT: beqz a1, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI2_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI2_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB2_2: ; RV64I-NEXT: li a0, 32 @@ -730,20 +743,33 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind { ; ; RV64I-LABEL: test_cttz_i32_zero_undef: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32M-LABEL: test_cttz_i32_zero_undef: diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll index 03a6a6b1c4b7d..33907e10730a7 100644 --- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll +++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll @@ -162,27 +162,38 @@ define i64 @ctz_dereferencing_pointer_zext(ptr %b) nounwind { ; ; RV64I-LABEL: ctz_dereferencing_pointer_zext: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw s0, 0(a0) -; RV64I-NEXT: neg a0, s0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI1_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI1_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI1_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI1_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 31 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret @@ -237,27 +248,37 @@ define signext i32 @ctz1(i32 signext %x) nounwind { ; ; RV64I-LABEL: ctz1: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI2_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI2_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI2_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI2_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 31 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret @@ -310,27 +331,37 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind { ; ; RV64I-LABEL: ctz1_flipped: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI3_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI3_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI3_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI3_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 31 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret @@ -381,20 +412,33 @@ define signext i32 @ctz2(i32 signext %x) nounwind { ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: beqz a0, .LBB4_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI4_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI4_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB4_2: ; RV64I-NEXT: li a0, 32 @@ -446,20 +490,33 @@ define signext i32 @ctz3(i32 signext %x) nounwind { ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: beqz a0, .LBB5_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI5_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI5_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB5_2: ; RV64I-NEXT: li a0, 32 @@ -767,27 +824,37 @@ define signext i32 @ctz5(i32 signext %x) nounwind { ; ; RV64I-LABEL: ctz5: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI8_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI8_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI8_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 31 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret @@ -840,27 +907,37 @@ define signext i32 @ctz6(i32 signext %x) nounwind { ; ; RV64I-LABEL: ctz6: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI9_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI9_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI9_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 31 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret @@ -918,28 +995,39 @@ define signext i32 @globalVar() nounwind { ; ; RV64I-LABEL: globalVar: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: lui a0, %hi(global_x) -; RV64I-NEXT: lw s0, %lo(global_x)(a0) -; RV64I-NEXT: neg a0, s0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI10_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: lw a0, %lo(global_x)(a0) +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI10_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI10_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 ; RV64I-NEXT: andi a0, a0, 31 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/mul-expand.ll b/llvm/test/CodeGen/RISCV/mul-expand.ll index 5bb74bc184d8b..a75a7355fa407 100644 --- a/llvm/test/CodeGen/RISCV/mul-expand.ll +++ b/llvm/test/CodeGen/RISCV/mul-expand.ll @@ -7,17 +7,30 @@ define i32 @muli32_0x555(i32 %a) nounwind { ; RV32I-LABEL: muli32_0x555: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 1365 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: slli a2, a0, 4 +; RV32I-NEXT: slli a3, a0, 6 +; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: slli a0, a0, 10 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0x555: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a1, 1365 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a2, a0, 4 +; RV64I-NEXT: slli a3, a0, 6 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: slli a0, a0, 10 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %a1 = mul i32 %a, 1365 ret i32 %a1 @@ -37,8 +50,17 @@ define i64 @muli64_0x555(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0x555: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 1365 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a2, a0, 4 +; RV64I-NEXT: slli a3, a0, 6 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: slli a0, a0, 10 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ret %a1 = mul i64 %a, 1365 ret i64 %a1 } @@ -46,19 +68,70 @@ define i64 @muli64_0x555(i64 %a) nounwind { define i32 @muli32_0x33333333(i32 %a) nounwind { ; RV32I-LABEL: muli32_0x33333333: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 209715 -; RV32I-NEXT: addi a1, a1, 819 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 4 +; RV32I-NEXT: slli a2, a0, 6 +; RV32I-NEXT: slli a3, a0, 8 +; RV32I-NEXT: slli a4, a0, 10 +; RV32I-NEXT: slli a5, a0, 14 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 16 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 22 +; RV32I-NEXT: sub a5, a5, a2 +; RV32I-NEXT: slli a2, a0, 24 +; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: slli a1, a0, 12 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: slli a3, a0, 18 +; RV32I-NEXT: add a3, a5, a3 +; RV32I-NEXT: slli a5, a0, 26 +; RV32I-NEXT: add a4, a4, a5 +; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: slli a1, a0, 20 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: slli a1, a0, 28 +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: slli a0, a0, 30 +; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: add a0, a4, a0 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0x33333333: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slli a2, a0, 6 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: slli a4, a0, 10 +; RV64I-NEXT: slli a5, a0, 14 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 22 +; RV64I-NEXT: sub a5, a5, a2 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: sub a4, a4, a2 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: sub a2, a2, a0 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: slli a3, a0, 18 +; RV64I-NEXT: add a3, a5, a3 +; RV64I-NEXT: slli a5, a0, 26 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 20 +; RV64I-NEXT: sub a3, a3, a1 +; RV64I-NEXT: slli a1, a0, 28 +; RV64I-NEXT: sub a4, a4, a1 +; RV64I-NEXT: slli a0, a0, 30 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: ret %a1 = mul i32 %a, 858993459 ret i32 %a1 @@ -79,9 +152,37 @@ define i64 @muli64_0x33333333(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0x33333333: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 209715 -; RV64I-NEXT: addiw a1, a1, 819 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slli a2, a0, 6 +; RV64I-NEXT: slli a3, a0, 8 +; RV64I-NEXT: slli a4, a0, 10 +; RV64I-NEXT: slli a5, a0, 14 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 22 +; RV64I-NEXT: sub a5, a5, a2 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: sub a4, a4, a2 +; RV64I-NEXT: slli a2, a0, 2 +; RV64I-NEXT: sub a2, a2, a0 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: slli a3, a0, 18 +; RV64I-NEXT: add a3, a5, a3 +; RV64I-NEXT: slli a5, a0, 26 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 20 +; RV64I-NEXT: sub a3, a3, a1 +; RV64I-NEXT: slli a1, a0, 28 +; RV64I-NEXT: sub a4, a4, a1 +; RV64I-NEXT: slli a0, a0, 30 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: ret %a1 = mul i64 %a, 858993459 ret i64 %a1 } @@ -89,19 +190,72 @@ define i64 @muli64_0x33333333(i64 %a) nounwind { define i32 @muli32_0xaaaaaaaa(i32 %a) nounwind { ; RV32I-LABEL: muli32_0xaaaaaaaa: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 699051 -; RV32I-NEXT: addi a1, a1, -1366 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: slli a2, a0, 1 +; RV32I-NEXT: slli a3, a0, 5 +; RV32I-NEXT: slli a4, a0, 7 +; RV32I-NEXT: slli a5, a0, 9 +; RV32I-NEXT: slli a6, a0, 11 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: slli a2, a0, 15 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 17 +; RV32I-NEXT: add a5, a5, a6 +; RV32I-NEXT: slli a6, a0, 23 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: slli a4, a0, 25 +; RV32I-NEXT: add a4, a6, a4 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 13 +; RV32I-NEXT: add a3, a5, a3 +; RV32I-NEXT: slli a5, a0, 19 +; RV32I-NEXT: add a2, a2, a5 +; RV32I-NEXT: slli a5, a0, 27 +; RV32I-NEXT: add a4, a4, a5 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: slli a3, a0, 21 +; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: slli a3, a0, 29 +; RV32I-NEXT: add a3, a4, a3 +; RV32I-NEXT: slli a0, a0, 31 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0xaaaaaaaa: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 699051 -; RV64I-NEXT: addiw a1, a1, -1366 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a3, a0, 5 +; RV64I-NEXT: slli a4, a0, 7 +; RV64I-NEXT: slli a5, a0, 9 +; RV64I-NEXT: slli a6, a0, 11 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 15 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 17 +; RV64I-NEXT: add a5, a5, a6 +; RV64I-NEXT: slli a6, a0, 23 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 25 +; RV64I-NEXT: add a4, a6, a4 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 13 +; RV64I-NEXT: add a3, a5, a3 +; RV64I-NEXT: slli a5, a0, 19 +; RV64I-NEXT: add a2, a2, a5 +; RV64I-NEXT: slli a5, a0, 27 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 21 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 29 +; RV64I-NEXT: add a3, a4, a3 +; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: sub a0, a3, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: ret %a1 = mul i32 %a, -1431655766 ret i32 %a1 @@ -122,10 +276,38 @@ define i64 @muli64_0xaaaaaaaa(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0xaaaaaaaa: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: addiw a1, a1, 1365 -; RV64I-NEXT: slli a1, a1, 1 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a3, a0, 5 +; RV64I-NEXT: slli a4, a0, 7 +; RV64I-NEXT: slli a5, a0, 9 +; RV64I-NEXT: slli a6, a0, 11 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 15 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 17 +; RV64I-NEXT: add a5, a5, a6 +; RV64I-NEXT: slli a6, a0, 23 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 25 +; RV64I-NEXT: add a4, a6, a4 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 13 +; RV64I-NEXT: add a3, a5, a3 +; RV64I-NEXT: slli a5, a0, 19 +; RV64I-NEXT: add a2, a2, a5 +; RV64I-NEXT: slli a5, a0, 27 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 21 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 29 +; RV64I-NEXT: add a3, a4, a3 +; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: ret %a1 = mul i64 %a, 2863311530 ret i64 %a1 } @@ -171,19 +353,36 @@ define i64 @muli64_0x0fffffff(i64 %a) nounwind { define i32 @muli32_0xf0f0f0f0(i32 %a) nounwind { ; RV32I-LABEL: muli32_0xf0f0f0f0: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 986895 -; RV32I-NEXT: addi a1, a1, 240 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 4 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a3, a0, 12 +; RV32I-NEXT: slli a4, a0, 16 +; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: slli a1, a0, 20 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 24 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: slli a0, a0, 28 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0xf0f0f0f0: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 986895 -; RV64I-NEXT: addiw a1, a1, 240 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 12 +; RV64I-NEXT: slli a4, a0, 16 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 20 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 24 +; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: slli a0, a0, 28 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %a1 = mul i32 %a, -252645136 ret i32 %a1 @@ -204,10 +403,22 @@ define i64 @muli64_0xf0f0f0f0(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0xf0f0f0f0: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 61681 -; RV64I-NEXT: addiw a1, a1, -241 -; RV64I-NEXT: slli a1, a1, 4 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 4 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 12 +; RV64I-NEXT: slli a4, a0, 16 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 20 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 24 +; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 28 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: ret %a1 = mul i64 %a, 4042322160 ret i64 %a1 } @@ -215,19 +426,28 @@ define i64 @muli64_0xf0f0f0f0(i64 %a) nounwind { define i32 @muli32_0xf7f7f7f7(i32 %a) nounwind { ; RV32I-LABEL: muli32_0xf7f7f7f7: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 1015679 -; RV32I-NEXT: addi a1, a1, 2039 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: slli a2, a0, 11 +; RV32I-NEXT: slli a3, a0, 19 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: slli a0, a0, 27 +; RV32I-NEXT: add a2, a2, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0xf7f7f7f7: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1015679 -; RV64I-NEXT: addiw a1, a1, 2039 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a2, a0, 11 +; RV64I-NEXT: slli a3, a0, 19 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret %a1 = mul i32 %a, -134744073 ret i32 %a1 @@ -248,11 +468,17 @@ define i64 @muli64_0xf7f7f7f7(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0xf7f7f7f7: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 248 -; RV64I-NEXT: addiw a1, a1, -129 -; RV64I-NEXT: slli a1, a1, 12 -; RV64I-NEXT: addi a1, a1, 2039 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a2, a0, 11 +; RV64I-NEXT: slli a3, a0, 19 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a0, 27 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: sub a0, a0, a3 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret %a1 = mul i64 %a, 4160223223 ret i64 %a1 } @@ -405,19 +631,44 @@ define i64 @muli64_0x7fffffff(i64 %a) nounwind { define i32 @muli32_0xdeadbeef(i32 %a) nounwind { ; RV32I-LABEL: muli32_0xdeadbeef: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 912092 -; RV32I-NEXT: addi a1, a1, -273 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a2, a0, 14 +; RV32I-NEXT: slli a3, a0, 17 +; RV32I-NEXT: slli a4, a0, 20 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 24 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 29 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: slli a4, a0, 4 +; RV32I-NEXT: add a4, a0, a4 +; RV32I-NEXT: add a1, a4, a1 +; RV32I-NEXT: slli a0, a0, 22 +; RV32I-NEXT: add a0, a3, a0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: neg a1, a2 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0xdeadbeef: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 912092 -; RV64I-NEXT: addiw a1, a1, -273 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a2, a0, 14 +; RV64I-NEXT: slli a3, a0, 17 +; RV64I-NEXT: slli a4, a0, 20 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 29 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: add a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a0, a0, 22 +; RV64I-NEXT: add a0, a3, a0 +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: neg a1, a2 +; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret %a1 = mul i32 %a, -559038737 ret i32 %a1 @@ -438,10 +689,25 @@ define i64 @muli64_0xdeadbeef(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0xdeadbeef: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 228023 -; RV64I-NEXT: slli a1, a1, 2 -; RV64I-NEXT: addi a1, a1, -273 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a2, a0, 14 +; RV64I-NEXT: slli a3, a0, 17 +; RV64I-NEXT: slli a4, a0, 20 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 29 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: add a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 22 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret %a1 = mul i64 %a, 3735928559 ret i64 %a1 } @@ -449,19 +715,52 @@ define i64 @muli64_0xdeadbeef(i64 %a) nounwind { define i32 @muli32_0x12345678(i32 %a) nounwind { ; RV32I-LABEL: muli32_0x12345678: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 74565 -; RV32I-NEXT: addi a1, a1, 1656 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: slli a2, a0, 7 +; RV32I-NEXT: slli a3, a0, 9 +; RV32I-NEXT: slli a4, a0, 11 +; RV32I-NEXT: slli a5, a0, 13 +; RV32I-NEXT: sub a2, a2, a1 +; RV32I-NEXT: slli a1, a0, 15 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 20 +; RV32I-NEXT: sub a5, a5, a1 +; RV32I-NEXT: slli a1, a0, 22 +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: slli a1, a0, 18 +; RV32I-NEXT: sub a5, a5, a1 +; RV32I-NEXT: slli a1, a0, 25 +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: slli a0, a0, 28 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a4, a4, a0 +; RV32I-NEXT: sub a0, a2, a4 +; RV32I-NEXT: ret ; ; RV64I-LABEL: muli32_0x12345678: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 74565 -; RV64I-NEXT: addiw a1, a1, 1656 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a2, a0, 7 +; RV64I-NEXT: slli a3, a0, 9 +; RV64I-NEXT: slli a4, a0, 11 +; RV64I-NEXT: slli a5, a0, 13 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 15 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 20 +; RV64I-NEXT: sub a5, a5, a1 +; RV64I-NEXT: slli a1, a0, 22 +; RV64I-NEXT: sub a4, a4, a1 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: slli a1, a0, 18 +; RV64I-NEXT: sub a5, a5, a1 +; RV64I-NEXT: slli a1, a0, 25 +; RV64I-NEXT: sub a4, a4, a1 +; RV64I-NEXT: slli a0, a0, 28 +; RV64I-NEXT: sub a2, a2, a5 +; RV64I-NEXT: sub a4, a4, a0 +; RV64I-NEXT: sub a0, a2, a4 ; RV64I-NEXT: ret %a1 = mul i32 %a, 305419896 ret i32 %a1 @@ -482,9 +781,28 @@ define i64 @muli64_0x12345678(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_0x12345678: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a1, 74565 -; RV64I-NEXT: addiw a1, a1, 1656 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a2, a0, 7 +; RV64I-NEXT: slli a3, a0, 9 +; RV64I-NEXT: slli a4, a0, 11 +; RV64I-NEXT: slli a5, a0, 13 +; RV64I-NEXT: sub a2, a2, a1 +; RV64I-NEXT: slli a1, a0, 15 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 20 +; RV64I-NEXT: sub a5, a5, a1 +; RV64I-NEXT: slli a1, a0, 22 +; RV64I-NEXT: sub a4, a4, a1 +; RV64I-NEXT: sub a2, a2, a3 +; RV64I-NEXT: slli a1, a0, 18 +; RV64I-NEXT: sub a5, a5, a1 +; RV64I-NEXT: slli a1, a0, 25 +; RV64I-NEXT: sub a4, a4, a1 +; RV64I-NEXT: slli a0, a0, 28 +; RV64I-NEXT: sub a2, a2, a5 +; RV64I-NEXT: sub a4, a4, a0 +; RV64I-NEXT: sub a0, a2, a4 +; RV64I-NEXT: ret %a1 = mul i64 %a, 305419896 ret i64 %a1 } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index a65ea088df50c..27d5eaa032522 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -464,11 +464,45 @@ define i32 @mulhu_constant(i32 %a) nounwind { ret i32 %4 } +define i32 @muli32_p10(i32 %a) nounwind { +; RV32I-LABEL: muli32_p10: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: muli32_p10: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 1 +; RV32IM-NEXT: slli a0, a0, 3 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p10: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p10: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 1 +; RV64IM-NEXT: slli a0, a0, 3 +; RV64IM-NEXT: addw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 10 + ret i32 %1 +} + define i32 @muli32_p14(i32 %a) nounwind { ; RV32I-LABEL: muli32_p14: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 14 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p14: ; RV32IM: # %bb.0: @@ -497,8 +531,10 @@ define i32 @muli32_p14(i32 %a) nounwind { define i32 @muli32_p18(i32 %a) nounwind { ; RV32I-LABEL: muli32_p18: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 18 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p18: ; RV32IM: # %bb.0: @@ -527,8 +563,10 @@ define i32 @muli32_p18(i32 %a) nounwind { define i32 @muli32_p28(i32 %a) nounwind { ; RV32I-LABEL: muli32_p28: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 28 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p28: ; RV32IM: # %bb.0: @@ -557,8 +595,10 @@ define i32 @muli32_p28(i32 %a) nounwind { define i32 @muli32_p30(i32 %a) nounwind { ; RV32I-LABEL: muli32_p30: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 30 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p30: ; RV32IM: # %bb.0: @@ -587,8 +627,10 @@ define i32 @muli32_p30(i32 %a) nounwind { define i32 @muli32_p34(i32 %a) nounwind { ; RV32I-LABEL: muli32_p34: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 34 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p34: ; RV32IM: # %bb.0: @@ -617,8 +659,10 @@ define i32 @muli32_p34(i32 %a) nounwind { define i32 @muli32_p36(i32 %a) nounwind { ; RV32I-LABEL: muli32_p36: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 36 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: slli a0, a0, 5 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p36: ; RV32IM: # %bb.0: @@ -647,8 +691,10 @@ define i32 @muli32_p36(i32 %a) nounwind { define i32 @muli32_p56(i32 %a) nounwind { ; RV32I-LABEL: muli32_p56: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 56 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p56: ; RV32IM: # %bb.0: @@ -677,8 +723,10 @@ define i32 @muli32_p56(i32 %a) nounwind { define i32 @muli32_p60(i32 %a) nounwind { ; RV32I-LABEL: muli32_p60: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 60 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p60: ; RV32IM: # %bb.0: @@ -707,8 +755,10 @@ define i32 @muli32_p60(i32 %a) nounwind { define i32 @muli32_p62(i32 %a) nounwind { ; RV32I-LABEL: muli32_p62: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 62 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p62: ; RV32IM: # %bb.0: @@ -762,6 +812,38 @@ define i32 @muli32_p65(i32 %a) nounwind { ret i32 %1 } +define i32 @muli32_p66(i32 %a) nounwind { +; RV32I-LABEL: muli32_p66: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: slli a0, a0, 6 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: muli32_p66: +; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a1, a0, 1 +; RV32IM-NEXT: slli a0, a0, 6 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p66: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 1 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p66: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 1 +; RV64IM-NEXT: slli a0, a0, 6 +; RV64IM-NEXT: addw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 66 + ret i32 %1 +} + define i32 @muli32_p63(i32 %a) nounwind { ; RV32I-LABEL: muli32_p63: ; RV32I: # %bb.0: @@ -895,8 +977,10 @@ define i64 @muli64_p72(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_p72: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 72 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 3 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_p72: ; RV64IM: # %bb.0: @@ -908,6 +992,48 @@ define i64 @muli64_p72(i64 %a) nounwind { ret i64 %1 } +define i64 @muli64_p68(i64 %a) nounwind { +; RV32I-LABEL: muli64_p68: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 68 +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __muldi3 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IM-LABEL: muli64_p68: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a2, 68 +; RV32IM-NEXT: slli a3, a1, 2 +; RV32IM-NEXT: slli a1, a1, 6 +; RV32IM-NEXT: add a1, a1, a3 +; RV32IM-NEXT: slli a3, a0, 2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: slli a0, a0, 6 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: add a0, a0, a3 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli64_p68: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a0, a0, 6 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli64_p68: +; RV64IM: # %bb.0: +; RV64IM-NEXT: slli a1, a0, 2 +; RV64IM-NEXT: slli a0, a0, 6 +; RV64IM-NEXT: add a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i64 %a, 68 + ret i64 %1 +} + define i32 @muli32_m63(i32 %a) nounwind { ; RV32I-LABEL: muli32_m63: ; RV32I: # %bb.0: @@ -1058,8 +1184,10 @@ define i64 @muli64_m65(i64 %a) nounwind { define i32 @muli32_p384(i32 %a) nounwind { ; RV32I-LABEL: muli32_p384: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, 384 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 7 +; RV32I-NEXT: slli a0, a0, 9 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p384: ; RV32IM: # %bb.0: @@ -1088,8 +1216,10 @@ define i32 @muli32_p384(i32 %a) nounwind { define i32 @muli32_p12288(i32 %a) nounwind { ; RV32I-LABEL: muli32_p12288: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 3 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 12 +; RV32I-NEXT: slli a0, a0, 14 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p12288: ; RV32IM: # %bb.0: @@ -1214,9 +1344,11 @@ define i32 @muli32_m3840(i32 %a) nounwind { define i32 @muli32_m4352(i32 %a) nounwind { ; RV32I-LABEL: muli32_m4352: ; RV32I: # %bb.0: -; RV32I-NEXT: li a1, -17 -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 12 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_m4352: ; RV32IM: # %bb.0: @@ -1227,13 +1359,10 @@ define i32 @muli32_m4352(i32 %a) nounwind { ; ; RV64I-LABEL: muli32_m4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a1, -17 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_m4352: @@ -1368,9 +1497,11 @@ define i64 @muli64_m4352(i64 %a) nounwind { ; ; RV64I-LABEL: muli64_m4352: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, -17 -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_m4352: ; RV64IM: # %bb.0: @@ -1459,10 +1590,10 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-NEXT: sltu a7, a5, a4 ; RV32I-NEXT: sub a6, a6, t2 ; RV32I-NEXT: mv t1, a7 -; RV32I-NEXT: beq t0, a3, .LBB40_2 +; RV32I-NEXT: beq t0, a3, .LBB43_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t1, t0, a3 -; RV32I-NEXT: .LBB40_2: +; RV32I-NEXT: .LBB43_2: ; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sub a1, t0, a3 ; RV32I-NEXT: sub a5, a5, a4 @@ -1573,10 +1704,10 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-NEXT: sltu a7, a3, a6 ; RV32I-NEXT: or t0, t0, a5 ; RV32I-NEXT: mv a5, a7 -; RV32I-NEXT: beq a4, t0, .LBB41_2 +; RV32I-NEXT: beq a4, t0, .LBB44_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a5, a4, t0 -; RV32I-NEXT: .LBB41_2: +; RV32I-NEXT: .LBB44_2: ; RV32I-NEXT: srli t1, a4, 26 ; RV32I-NEXT: slli t2, a2, 6 ; RV32I-NEXT: srli t3, a2, 26 @@ -2001,8 +2132,10 @@ define i64 @muland_demand(i64 %x) nounwind { ; RV64I-NEXT: li a1, -29 ; RV64I-NEXT: srli a1, a1, 2 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: li a1, 12 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret ; ; RV64IM-LABEL: muland_demand: ; RV64IM: # %bb.0: @@ -2037,9 +2170,10 @@ define i64 @mulzext_demand(i32 signext %x) nounwind { ; ; RV64I-LABEL: mulzext_demand: ; RV64I: # %bb.0: -; RV64I-NEXT: li a1, 3 -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: tail __muldi3 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: slli a0, a0, 34 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret ; ; RV64IM-LABEL: mulzext_demand: ; RV64IM: # %bb.0: @@ -2056,8 +2190,20 @@ define i32 @mulfshl_demand(i32 signext %x) nounwind { ; RV32I-LABEL: mulfshl_demand: ; RV32I: # %bb.0: ; RV32I-NEXT: srli a0, a0, 11 -; RV32I-NEXT: lui a1, 92808 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 19 +; RV32I-NEXT: slli a2, a0, 15 +; RV32I-NEXT: slli a3, a0, 21 +; RV32I-NEXT: slli a4, a0, 23 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: slli a2, a0, 25 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 27 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: slli a0, a0, 29 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: sub a0, a1, a2 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: mulfshl_demand: ; RV32IM: # %bb.0: @@ -2068,13 +2214,20 @@ define i32 @mulfshl_demand(i32 signext %x) nounwind { ; ; RV64I-LABEL: mulfshl_demand: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: srliw a0, a0, 11 -; RV64I-NEXT: lui a1, 92808 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 19 +; RV64I-NEXT: slli a2, a0, 15 +; RV64I-NEXT: slli a3, a0, 21 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 25 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 29 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: sub a2, a2, a0 +; RV64I-NEXT: sub a0, a1, a2 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: mulfshl_demand: @@ -2091,8 +2244,20 @@ define i32 @mulfshl_demand(i32 signext %x) nounwind { define i32 @mulor_demand(i32 signext %x, i32 signext %y) nounwind { ; RV32I-LABEL: mulor_demand: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 92808 -; RV32I-NEXT: tail __mulsi3 +; RV32I-NEXT: slli a1, a0, 19 +; RV32I-NEXT: slli a2, a0, 15 +; RV32I-NEXT: slli a3, a0, 21 +; RV32I-NEXT: slli a4, a0, 23 +; RV32I-NEXT: add a1, a2, a1 +; RV32I-NEXT: slli a2, a0, 25 +; RV32I-NEXT: add a3, a3, a4 +; RV32I-NEXT: slli a4, a0, 27 +; RV32I-NEXT: add a2, a2, a4 +; RV32I-NEXT: slli a0, a0, 29 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: sub a0, a1, a2 +; RV32I-NEXT: ret ; ; RV32IM-LABEL: mulor_demand: ; RV32IM: # %bb.0: @@ -2102,12 +2267,19 @@ define i32 @mulor_demand(i32 signext %x, i32 signext %y) nounwind { ; ; RV64I-LABEL: mulor_demand: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 92808 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 19 +; RV64I-NEXT: slli a2, a0, 15 +; RV64I-NEXT: slli a3, a0, 21 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: slli a2, a0, 25 +; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 29 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: sub a2, a2, a0 +; RV64I-NEXT: sub a0, a1, a2 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: mulor_demand: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index d9f7d36127293..10ef3357d4783 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -357,20 +357,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB6_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB6_2: ; RV64I-NEXT: li a0, 32 @@ -397,20 +410,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind { define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { ; RV64I-LABEL: cttz_zero_undef_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI7_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: cttz_zero_undef_i32: @@ -429,26 +455,36 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findFirstSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI8_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI8_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI8_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: findFirstSet_i32: @@ -472,27 +508,37 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-LABEL: ffs_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI9_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a0, a0, 1 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: lui a4, %hi(.LCPI9_0) +; RV64I-NEXT: addi a4, a4, %lo(.LCPI9_0) +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a1, a1, 1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64XTHEADBB-LABEL: ffs_i32: diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 17eb0817d548a..3cd1931b6ae4c 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -347,20 +347,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: beqz a0, .LBB6_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI6_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB6_2: ; RV64I-NEXT: li a0, 32 @@ -377,20 +390,33 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind { define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { ; RV64I-LABEL: cttz_zero_undef_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: negw a1, a0 ; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: slli a1, a0, 6 +; RV64I-NEXT: slli a2, a0, 8 +; RV64I-NEXT: slli a3, a0, 10 +; RV64I-NEXT: slli a4, a0, 12 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 16 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 18 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a4, a0, 4 +; RV64I-NEXT: subw a4, a0, a4 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: slli a4, a0, 14 +; RV64I-NEXT: subw a3, a3, a4 +; RV64I-NEXT: slli a4, a0, 23 +; RV64I-NEXT: subw a2, a2, a4 +; RV64I-NEXT: slli a0, a0, 27 +; RV64I-NEXT: add a1, a1, a3 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: srliw a0, a0, 27 ; RV64I-NEXT: lui a1, %hi(.LCPI7_0) ; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0) ; RV64I-NEXT: add a0, a1, a0 ; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: cttz_zero_undef_i32: @@ -404,26 +430,36 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { ; RV64I-LABEL: findFirstSet_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI8_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: snez a1, s0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: lui a2, %hi(.LCPI8_0) +; RV64I-NEXT: addi a2, a2, %lo(.LCPI8_0) +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: findFirstSet_i32: @@ -442,27 +478,37 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { define signext i32 @ffs_i32(i32 signext %a) nounwind { ; RV64I-LABEL: ffs_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: neg a0, a0 -; RV64I-NEXT: and a0, s0, a0 -; RV64I-NEXT: lui a1, 30667 -; RV64I-NEXT: addiw a1, a1, 1329 -; RV64I-NEXT: call __muldi3 -; RV64I-NEXT: srliw a0, a0, 27 -; RV64I-NEXT: lui a1, %hi(.LCPI9_0) -; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: lbu a0, 0(a0) -; RV64I-NEXT: seqz a1, s0 -; RV64I-NEXT: addi a0, a0, 1 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: negw a1, a0 +; RV64I-NEXT: and a1, a0, a1 +; RV64I-NEXT: slli a2, a1, 6 +; RV64I-NEXT: slli a3, a1, 8 +; RV64I-NEXT: slli a4, a1, 10 +; RV64I-NEXT: slli a5, a1, 12 +; RV64I-NEXT: add a2, a2, a3 +; RV64I-NEXT: slli a3, a1, 16 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 18 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: slli a5, a1, 4 +; RV64I-NEXT: subw a5, a1, a5 +; RV64I-NEXT: add a2, a5, a2 +; RV64I-NEXT: slli a5, a1, 14 +; RV64I-NEXT: subw a4, a4, a5 +; RV64I-NEXT: slli a5, a1, 23 +; RV64I-NEXT: subw a3, a3, a5 +; RV64I-NEXT: add a2, a2, a4 +; RV64I-NEXT: lui a4, %hi(.LCPI9_0) +; RV64I-NEXT: addi a4, a4, %lo(.LCPI9_0) +; RV64I-NEXT: slli a1, a1, 27 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: add a1, a2, a1 +; RV64I-NEXT: srliw a1, a1, 27 +; RV64I-NEXT: add a1, a4, a1 +; RV64I-NEXT: lbu a1, 0(a1) +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: addi a1, a1, 1 +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ffs_i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll b/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll index 7990c1c1eabc2..4d9a6aeaad2ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll +++ b/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll @@ -9,26 +9,35 @@ define i32 @vscale_known_nonzero() { ; CHECK-LABEL: vscale_known_nonzero: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: neg a1, a0 +; CHECK-NEXT: negw a1, a0 ; CHECK-NEXT: and a0, a0, a1 -; CHECK-NEXT: lui a1, 30667 -; CHECK-NEXT: addiw a1, a1, 1329 -; CHECK-NEXT: call __muldi3 +; CHECK-NEXT: slli a1, a0, 6 +; CHECK-NEXT: slli a2, a0, 8 +; CHECK-NEXT: slli a3, a0, 10 +; CHECK-NEXT: slli a4, a0, 12 +; CHECK-NEXT: add a1, a1, a2 +; CHECK-NEXT: slli a2, a0, 16 +; CHECK-NEXT: subw a3, a3, a4 +; CHECK-NEXT: slli a4, a0, 18 +; CHECK-NEXT: subw a2, a2, a4 +; CHECK-NEXT: slli a4, a0, 4 +; CHECK-NEXT: subw a4, a0, a4 +; CHECK-NEXT: add a1, a4, a1 +; CHECK-NEXT: slli a4, a0, 14 +; CHECK-NEXT: subw a3, a3, a4 +; CHECK-NEXT: slli a4, a0, 23 +; CHECK-NEXT: subw a2, a2, a4 +; CHECK-NEXT: slli a0, a0, 27 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: srliw a0, a0, 27 ; CHECK-NEXT: lui a1, %hi(.LCPI0_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0) ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: lbu a0, 0(a0) -; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: .cfi_restore ra -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %x = call i32 @llvm.vscale() %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index c6503813aeed2..17a09bf7dbe6c 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -9,40 +9,62 @@ define i1 @test_srem_odd(i29 %X) nounwind { ; RV32-LABEL: test_srem_odd: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: lui a1, 128424 -; RV32-NEXT: addi a1, a1, 331 -; RV32-NEXT: call __mulsi3 -; RV32-NEXT: lui a1, 662 -; RV32-NEXT: addi a1, a1, -83 -; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: lui a1, 1324 -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: addi a1, a1, -165 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: slli a2, a0, 6 +; RV32-NEXT: slli a3, a0, 8 +; RV32-NEXT: slli a4, a0, 15 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: slli a2, a0, 19 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: slli a4, a0, 21 +; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: slli a4, a0, 2 +; RV32-NEXT: add a4, a0, a4 +; RV32-NEXT: sub a1, a1, a4 +; RV32-NEXT: slli a4, a0, 17 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: slli a0, a0, 23 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: lui a2, 662 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: lui a3, 1324 +; RV32-NEXT: addi a2, a2, -83 +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: addi a0, a3, -165 +; RV32-NEXT: sltu a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_srem_odd: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a1, 128424 -; RV64-NEXT: addiw a1, a1, 331 -; RV64-NEXT: call __muldi3 -; RV64-NEXT: lui a1, 662 -; RV64-NEXT: addi a1, a1, -83 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: lui a1, 1324 -; RV64-NEXT: slli a0, a0, 35 -; RV64-NEXT: srli a0, a0, 35 -; RV64-NEXT: addiw a1, a1, -165 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: slli a2, a0, 6 +; RV64-NEXT: slli a3, a0, 8 +; RV64-NEXT: slli a4, a0, 15 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: slli a2, a0, 19 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: slli a4, a0, 21 +; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: slli a4, a0, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: subw a1, a1, a4 +; RV64-NEXT: slli a4, a0, 17 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: slli a0, a0, 23 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: lui a2, 662 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: lui a3, 1324 +; RV64-NEXT: addi a2, a2, -83 +; RV64-NEXT: subw a0, a0, a2 +; RV64-NEXT: subw a1, a1, a0 +; RV64-NEXT: slli a1, a1, 35 +; RV64-NEXT: srli a1, a1, 35 +; RV64-NEXT: addiw a0, a3, -165 +; RV64-NEXT: sltu a0, a1, a0 ; RV64-NEXT: ret ; ; RV32M-LABEL: test_srem_odd: @@ -382,65 +404,122 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 ; RV64-NEXT: lbu a0, 12(a0) -; RV64-NEXT: ld a1, 0(s0) -; RV64-NEXT: lwu a2, 8(s0) +; RV64-NEXT: ld s3, 0(s0) +; RV64-NEXT: lwu a1, 8(s0) ; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a3, a1, 2 -; RV64-NEXT: or a0, a2, a0 -; RV64-NEXT: slli a2, a2, 62 -; RV64-NEXT: slli a1, a1, 31 -; RV64-NEXT: or a2, a2, a3 -; RV64-NEXT: slli s1, a0, 29 -; RV64-NEXT: srai a0, a2, 31 -; RV64-NEXT: srai s1, s1, 31 -; RV64-NEXT: srai s2, a1, 31 +; RV64-NEXT: srli a2, s3, 2 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: slli a1, a1, 62 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: slli a2, s3, 31 +; RV64-NEXT: slli a3, a0, 29 +; RV64-NEXT: srai a0, a1, 31 +; RV64-NEXT: srai s2, a3, 31 +; RV64-NEXT: srai s4, a2, 31 ; RV64-NEXT: li a1, 7 ; RV64-NEXT: call __moddi3 -; RV64-NEXT: mv s3, a0 -; RV64-NEXT: li a1, -5 -; RV64-NEXT: mv a0, s1 -; RV64-NEXT: call __moddi3 ; RV64-NEXT: mv s1, a0 -; RV64-NEXT: lui a0, 699051 -; RV64-NEXT: addiw a1, a0, -1365 -; RV64-NEXT: slli a0, a1, 32 -; RV64-NEXT: add a1, a1, a0 +; RV64-NEXT: li a1, -5 ; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call __muldi3 -; RV64-NEXT: lui a1, %hi(.LCPI3_0) -; RV64-NEXT: addi s1, s1, -2 -; RV64-NEXT: addi s3, s3, -1 -; RV64-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; RV64-NEXT: seqz a2, s1 -; RV64-NEXT: seqz a3, s3 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: slli a4, a2, 2 -; RV64-NEXT: slli a5, a3, 31 -; RV64-NEXT: srli a5, a5, 62 -; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: or a4, a5, a4 -; RV64-NEXT: slli a5, a0, 63 -; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: or a0, a0, a5 -; RV64-NEXT: slli a2, a2, 29 -; RV64-NEXT: slli a3, a3, 33 -; RV64-NEXT: srli a2, a2, 61 -; RV64-NEXT: sltu a0, a1, a0 -; RV64-NEXT: neg a0, a0 -; RV64-NEXT: slli a0, a0, 31 -; RV64-NEXT: srli a0, a0, 31 -; RV64-NEXT: or a0, a0, a3 -; RV64-NEXT: sd a0, 0(s0) +; RV64-NEXT: call __moddi3 +; RV64-NEXT: slli a1, s4, 4 +; RV64-NEXT: slli a2, s4, 6 +; RV64-NEXT: slli a3, s4, 8 +; RV64-NEXT: slli a4, s4, 10 +; RV64-NEXT: slli a5, s4, 14 +; RV64-NEXT: slli a6, s4, 16 +; RV64-NEXT: slli a7, s4, 22 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: slli a2, s4, 24 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: slli a4, s3, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: slli a6, s3, 34 +; RV64-NEXT: add a2, a7, a2 +; RV64-NEXT: slli a7, s3, 48 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: slli a6, s3, 50 +; RV64-NEXT: add a6, a7, a6 +; RV64-NEXT: slli a7, s4, 2 +; RV64-NEXT: add a7, s4, a7 +; RV64-NEXT: add a1, a7, a1 +; RV64-NEXT: slli a7, s4, 12 +; RV64-NEXT: add a3, a3, a7 +; RV64-NEXT: slli a7, s4, 18 +; RV64-NEXT: add a5, a5, a7 +; RV64-NEXT: slli a7, s4, 26 +; RV64-NEXT: add a2, a2, a7 +; RV64-NEXT: slli a7, s3, 36 +; RV64-NEXT: add a4, a4, a7 +; RV64-NEXT: slli a7, s3, 52 +; RV64-NEXT: add a6, a6, a7 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: slli a3, s4, 20 +; RV64-NEXT: add a3, a5, a3 +; RV64-NEXT: slli a5, s4, 28 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, s3, 38 +; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: slli a5, s3, 54 +; RV64-NEXT: add a5, a6, a5 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: slli s4, s4, 30 +; RV64-NEXT: add a2, a2, s4 +; RV64-NEXT: slli a3, s3, 40 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: slli a4, s3, 56 +; RV64-NEXT: add a4, a5, a4 +; RV64-NEXT: slli a5, s3, 42 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: slli a2, s3, 58 +; RV64-NEXT: addi a0, a0, -2 +; RV64-NEXT: addi s1, s1, -1 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: seqz a6, s1 +; RV64-NEXT: addi a6, a6, -1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a0, 2 +; RV64-NEXT: add a2, a4, a2 +; RV64-NEXT: slli a4, a6, 31 +; RV64-NEXT: srli a4, a4, 62 +; RV64-NEXT: or a4, a4, a5 +; RV64-NEXT: slli a5, s3, 44 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, s3, 60 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, s3, 46 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli s3, s3, 62 +; RV64-NEXT: add a2, a2, s3 +; RV64-NEXT: lui a5, %hi(.LCPI3_0) +; RV64-NEXT: ld a5, %lo(.LCPI3_0)(a5) +; RV64-NEXT: slli a0, a0, 29 +; RV64-NEXT: slli a6, a6, 33 +; RV64-NEXT: srli a0, a0, 61 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: sub a2, a5, a2 +; RV64-NEXT: sub a2, a2, a1 +; RV64-NEXT: slli a1, a2, 63 +; RV64-NEXT: srli a2, a2, 1 +; RV64-NEXT: or a1, a2, a1 +; RV64-NEXT: sltu a1, a5, a1 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: slli a1, a1, 31 +; RV64-NEXT: srli a1, a1, 31 +; RV64-NEXT: or a1, a1, a6 +; RV64-NEXT: sd a1, 0(s0) ; RV64-NEXT: sw a4, 8(s0) -; RV64-NEXT: sb a2, 12(s0) +; RV64-NEXT: sb a0, 12(s0) ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index c73a18c8869d5..46e250710f9c1 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -9,30 +9,40 @@ define i1 @test_urem_odd(i13 %X) nounwind { ; RV32-LABEL: test_urem_odd: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a1, a1, -819 -; RV32-NEXT: call __mulsi3 +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: slli a2, a0, 6 +; RV32-NEXT: slli a3, a0, 8 +; RV32-NEXT: sub a1, a1, a2 +; RV32-NEXT: slli a2, a0, 10 +; RV32-NEXT: sub a3, a3, a2 +; RV32-NEXT: slli a2, a0, 2 +; RV32-NEXT: sub a2, a0, a2 +; RV32-NEXT: slli a0, a0, 12 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a0, a3, a0 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: slli a0, a0, 19 ; RV32-NEXT: srli a0, a0, 19 ; RV32-NEXT: sltiu a0, a0, 1639 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_odd: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a1, a1, -819 -; RV64-NEXT: call __muldi3 +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: slli a2, a0, 6 +; RV64-NEXT: slli a3, a0, 8 +; RV64-NEXT: subw a1, a1, a2 +; RV64-NEXT: slli a2, a0, 10 +; RV64-NEXT: subw a3, a3, a2 +; RV64-NEXT: slli a2, a0, 2 +; RV64-NEXT: subw a2, a0, a2 +; RV64-NEXT: slli a0, a0, 12 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a0, a3, a0 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: slli a0, a0, 51 ; RV64-NEXT: srli a0, a0, 51 ; RV64-NEXT: sltiu a0, a0, 1639 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; RV32M-LABEL: test_urem_odd: @@ -82,42 +92,64 @@ define i1 @test_urem_odd(i13 %X) nounwind { define i1 @test_urem_even(i27 %X) nounwind { ; RV32-LABEL: test_urem_even: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: lui a1, 28087 -; RV32-NEXT: addi a1, a1, -585 -; RV32-NEXT: call __mulsi3 +; RV32-NEXT: slli a1, a0, 6 +; RV32-NEXT: slli a2, a0, 9 +; RV32-NEXT: slli a3, a0, 12 +; RV32-NEXT: slli a4, a0, 15 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: slli a2, a0, 21 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: slli a4, a0, 24 +; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: slli a4, a0, 3 +; RV32-NEXT: add a4, a0, a4 +; RV32-NEXT: add a1, a4, a1 +; RV32-NEXT: slli a4, a0, 18 +; RV32-NEXT: add a3, a3, a4 +; RV32-NEXT: slli a0, a0, 27 +; RV32-NEXT: sub a0, a0, a2 +; RV32-NEXT: lui a2, 2341 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sub a0, a0, a1 ; RV32-NEXT: slli a1, a0, 26 ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: srli a0, a0, 6 ; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: lui a1, 2341 ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: srli a0, a0, 5 -; RV32-NEXT: addi a1, a1, -1755 +; RV32-NEXT: addi a1, a2, -1755 ; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_even: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: lui a1, 28087 -; RV64-NEXT: addiw a1, a1, -585 -; RV64-NEXT: call __muldi3 +; RV64-NEXT: slli a1, a0, 6 +; RV64-NEXT: slli a2, a0, 9 +; RV64-NEXT: slli a3, a0, 12 +; RV64-NEXT: slli a4, a0, 15 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: slli a2, a0, 21 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: slli a4, a0, 24 +; RV64-NEXT: add a2, a2, a4 +; RV64-NEXT: slli a4, a0, 3 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: add a1, a4, a1 +; RV64-NEXT: slli a4, a0, 18 +; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: slli a0, a0, 27 +; RV64-NEXT: subw a0, a0, a2 +; RV64-NEXT: lui a2, 2341 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: subw a0, a0, a1 ; RV64-NEXT: slli a1, a0, 26 ; RV64-NEXT: slli a0, a0, 37 ; RV64-NEXT: srli a0, a0, 38 ; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: lui a1, 2341 ; RV64-NEXT: slli a0, a0, 37 ; RV64-NEXT: srli a0, a0, 37 -; RV64-NEXT: addiw a1, a1, -1755 +; RV64-NEXT: addiw a1, a2, -1755 ; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; RV32M-LABEL: test_urem_even: @@ -256,28 +288,32 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { define i1 @test_urem_negative_odd(i9 %X) nounwind { ; RV32-LABEL: test_urem_negative_odd: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: li a1, 307 -; RV32-NEXT: call __mulsi3 +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: slli a2, a0, 4 +; RV32-NEXT: slli a3, a0, 6 +; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: sub a2, a2, a3 +; RV32-NEXT: sub a1, a1, a2 +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: andi a0, a0, 511 ; RV32-NEXT: sltiu a0, a0, 2 ; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_negative_odd: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: li a1, 307 -; RV64-NEXT: call __muldi3 +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: slli a2, a0, 4 +; RV64-NEXT: slli a3, a0, 6 +; RV64-NEXT: subw a1, a1, a0 +; RV64-NEXT: subw a2, a2, a3 +; RV64-NEXT: subw a1, a1, a2 +; RV64-NEXT: slli a0, a0, 8 +; RV64-NEXT: add a0, a1, a0 ; RV64-NEXT: andi a0, a0, 511 ; RV64-NEXT: sltiu a0, a0, 2 ; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; RV32M-LABEL: test_urem_negative_odd: @@ -323,117 +359,127 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind { define void @test_urem_vec(ptr %X) nounwind { ; RV32-LABEL: test_urem_vec: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lbu a0, 4(a0) -; RV32-NEXT: lw a1, 0(s0) -; RV32-NEXT: slli a0, a0, 10 -; RV32-NEXT: srli s1, a1, 22 -; RV32-NEXT: or s1, s1, a0 -; RV32-NEXT: srli s2, a1, 11 -; RV32-NEXT: andi a0, a1, 2047 -; RV32-NEXT: li a1, 683 -; RV32-NEXT: call __mulsi3 -; RV32-NEXT: slli a1, a0, 10 -; RV32-NEXT: slli a0, a0, 21 -; RV32-NEXT: srli a0, a0, 22 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: andi a0, a0, 2047 -; RV32-NEXT: sltiu s3, a0, 342 -; RV32-NEXT: li a1, 819 -; RV32-NEXT: mv a0, s1 -; RV32-NEXT: call __mulsi3 -; RV32-NEXT: addi a0, a0, -1638 -; RV32-NEXT: andi a0, a0, 2047 -; RV32-NEXT: sltiu s1, a0, 2 -; RV32-NEXT: xori s4, s1, 1 -; RV32-NEXT: li a1, 1463 -; RV32-NEXT: mv a0, s2 -; RV32-NEXT: call __mulsi3 -; RV32-NEXT: addi a0, a0, -1463 -; RV32-NEXT: addi s3, s3, -1 -; RV32-NEXT: addi s1, s1, -1 -; RV32-NEXT: andi a0, a0, 2047 -; RV32-NEXT: andi a1, s3, 2047 -; RV32-NEXT: slli s1, s1, 22 -; RV32-NEXT: sltiu a0, a0, 293 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: andi a0, a0, 2047 -; RV32-NEXT: slli a0, a0, 11 -; RV32-NEXT: or a0, a0, s1 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: sw a0, 0(s0) -; RV32-NEXT: sb s4, 4(s0) -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lbu a1, 4(a0) +; RV32-NEXT: lw a2, 0(a0) +; RV32-NEXT: slli a1, a1, 10 +; RV32-NEXT: srli a3, a2, 22 +; RV32-NEXT: srli a4, a2, 11 +; RV32-NEXT: andi a2, a2, 2047 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: slli a5, a2, 4 +; RV32-NEXT: slli a6, a2, 6 +; RV32-NEXT: slli a7, a2, 8 +; RV32-NEXT: slli t0, a2, 10 +; RV32-NEXT: addi a4, a4, -1 +; RV32-NEXT: addi a1, a1, -2 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: sub a3, t0, a7 +; RV32-NEXT: slli a6, a4, 3 +; RV32-NEXT: slli a7, a4, 6 +; RV32-NEXT: slli t0, a4, 9 +; RV32-NEXT: add a2, a2, a5 +; RV32-NEXT: slli a5, a1, 2 +; RV32-NEXT: add a4, a4, a6 +; RV32-NEXT: slli a6, a1, 4 +; RV32-NEXT: add a7, a7, t0 +; RV32-NEXT: slli t0, a1, 6 +; RV32-NEXT: sub a6, a6, t0 +; RV32-NEXT: slli t0, a1, 8 +; RV32-NEXT: sub a5, a5, a1 +; RV32-NEXT: slli a1, a1, 10 +; RV32-NEXT: sub a1, t0, a1 +; RV32-NEXT: sub a3, a3, a2 +; RV32-NEXT: add a4, a4, a7 +; RV32-NEXT: sub a2, a5, a6 +; RV32-NEXT: slli a5, a3, 10 +; RV32-NEXT: slli a3, a3, 21 +; RV32-NEXT: neg a4, a4 +; RV32-NEXT: sub a2, a2, a1 +; RV32-NEXT: srli a3, a3, 22 +; RV32-NEXT: andi a1, a4, 2047 +; RV32-NEXT: andi a2, a2, 2047 +; RV32-NEXT: or a3, a3, a5 +; RV32-NEXT: sltiu a1, a1, 293 +; RV32-NEXT: sltiu a2, a2, 2 +; RV32-NEXT: andi a3, a3, 2047 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: xori a4, a2, 1 +; RV32-NEXT: sltiu a3, a3, 342 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: andi a1, a1, 2047 +; RV32-NEXT: addi a3, a3, -1 +; RV32-NEXT: slli a1, a1, 11 +; RV32-NEXT: slli a2, a2, 22 +; RV32-NEXT: andi a3, a3, 2047 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sb a4, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_vec: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -48 -; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: mv s0, a0 -; RV64-NEXT: lbu a0, 4(a0) -; RV64-NEXT: lwu a1, 0(s0) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: srli s1, a0, 22 -; RV64-NEXT: srli s2, a0, 11 -; RV64-NEXT: andi a0, a0, 2047 -; RV64-NEXT: li a1, 683 -; RV64-NEXT: call __muldi3 -; RV64-NEXT: slli a1, a0, 10 -; RV64-NEXT: slli a0, a0, 53 -; RV64-NEXT: srli a0, a0, 54 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: andi a0, a0, 2047 -; RV64-NEXT: sltiu s3, a0, 342 -; RV64-NEXT: li a1, 1463 -; RV64-NEXT: mv a0, s2 -; RV64-NEXT: call __muldi3 -; RV64-NEXT: addi a0, a0, -1463 -; RV64-NEXT: andi a0, a0, 2047 -; RV64-NEXT: sltiu s2, a0, 293 -; RV64-NEXT: li a1, 819 -; RV64-NEXT: mv a0, s1 -; RV64-NEXT: call __muldi3 -; RV64-NEXT: addi a0, a0, -1638 -; RV64-NEXT: addi s3, s3, -1 -; RV64-NEXT: addi s2, s2, -1 -; RV64-NEXT: andi a0, a0, 2047 -; RV64-NEXT: andi a1, s3, 2047 -; RV64-NEXT: andi a2, s2, 2047 -; RV64-NEXT: sltiu a0, a0, 2 +; RV64-NEXT: lbu a1, 4(a0) +; RV64-NEXT: lwu a2, 0(a0) +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: or a1, a2, a1 +; RV64-NEXT: srli a2, a1, 22 +; RV64-NEXT: srli a3, a1, 11 +; RV64-NEXT: andi a1, a1, 2047 +; RV64-NEXT: slli a4, a1, 2 +; RV64-NEXT: slli a5, a1, 4 +; RV64-NEXT: slli a6, a1, 6 +; RV64-NEXT: slli a7, a1, 8 +; RV64-NEXT: slli t0, a1, 10 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: addi a2, a2, -2 +; RV64-NEXT: add a1, a1, a4 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: subw a4, t0, a7 +; RV64-NEXT: slli a6, a3, 3 +; RV64-NEXT: slli a7, a3, 6 +; RV64-NEXT: slli t0, a3, 9 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 2 +; RV64-NEXT: add a3, a3, a6 +; RV64-NEXT: slli a6, a2, 4 +; RV64-NEXT: add a7, a7, t0 +; RV64-NEXT: slli t0, a2, 6 +; RV64-NEXT: subw a6, a6, t0 +; RV64-NEXT: slli t0, a2, 8 +; RV64-NEXT: subw a5, a5, a2 +; RV64-NEXT: slli a2, a2, 10 +; RV64-NEXT: subw a2, t0, a2 +; RV64-NEXT: subw a4, a4, a1 +; RV64-NEXT: add a3, a3, a7 +; RV64-NEXT: subw a1, a5, a6 +; RV64-NEXT: slli a5, a4, 10 +; RV64-NEXT: slli a4, a4, 53 +; RV64-NEXT: negw a3, a3 +; RV64-NEXT: subw a1, a1, a2 +; RV64-NEXT: srli a4, a4, 54 +; RV64-NEXT: andi a2, a3, 2047 +; RV64-NEXT: andi a1, a1, 2047 +; RV64-NEXT: or a4, a4, a5 +; RV64-NEXT: sltiu a2, a2, 293 +; RV64-NEXT: sltiu a1, a1, 2 +; RV64-NEXT: andi a3, a4, 2047 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: sltiu a3, a3, 342 +; RV64-NEXT: andi a2, a2, 2047 +; RV64-NEXT: slli a1, a1, 22 +; RV64-NEXT: addi a3, a3, -1 ; RV64-NEXT: slli a2, a2, 11 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: slli a0, a0, 22 -; RV64-NEXT: or a0, a2, a0 -; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a1, a0, 31 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sw a0, 0(s0) -; RV64-NEXT: sb a1, 4(s0) -; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: andi a3, a3, 2047 +; RV64-NEXT: or a1, a2, a1 +; RV64-NEXT: or a1, a3, a1 +; RV64-NEXT: slli a2, a1, 31 +; RV64-NEXT: srli a2, a2, 63 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: sb a2, 4(a0) ; RV64-NEXT: ret ; ; RV32M-LABEL: test_urem_vec: diff --git a/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll b/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll index 14e6b9bddd0a0..c73d836c45ca3 100644 --- a/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll +++ b/llvm/test/CodeGen/RISCV/xqccmp-additional-stack.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqccmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 +declare i32 @__mulsi3(i32, i32) + define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 { ; RV32-LABEL: func: ; RV32: # %bb.0: # %entry @@ -45,8 +47,8 @@ while.body: ; preds = %while.body, %entry br i1 %0, label %while.body, label %while.end while.end: ; preds = %while.body - %or5 = mul i32 %_c, 16843009 - store i32 %or5, ptr null, align 4 + %mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009) + store i32 %mul_result, ptr null, align 4 %1 = and i32 %n.addr.042, 1 %scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1 store i8 %conv14, ptr %scevgep, align 1 diff --git a/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll b/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll index c98b9b80378fd..601780e346a0a 100644 --- a/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll +++ b/llvm/test/CodeGen/RISCV/zcmp-additional-stack.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=riscv32 -mattr=+zcmp,+e -target-abi ilp32e -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 + +declare i32 @__mulsi3(i32, i32) + define ptr @func(ptr %s, i32 %_c, ptr %incdec.ptr, i1 %0, i8 %conv14) #0 { ; RV32-LABEL: func: ; RV32: # %bb.0: # %entry @@ -44,8 +47,8 @@ while.body: ; preds = %while.body, %entry br i1 %0, label %while.body, label %while.end while.end: ; preds = %while.body - %or5 = mul i32 %_c, 16843009 - store i32 %or5, ptr null, align 4 + %mul_result = call i32 @__mulsi3(i32 %_c, i32 16843009) + store i32 %mul_result, ptr null, align 4 %1 = and i32 %n.addr.042, 1 %scevgep = getelementptr i8, ptr %incdec.ptr, i32 %1 store i8 %conv14, ptr %scevgep, align 1