Skip to content

[RISCV] Expand constant multiplication for targets without M extension #137195

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 39 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "RISCVSelectionDAGInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
Expand Down Expand Up @@ -15502,6 +15503,32 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}

// Try to expand a multiply to a sequence of shifts and add/subs,
// for a machine without native mul instruction.
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
const uint64_t BitWidth = VT.getFixedSizeInBits();

SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
SDValue N0 = N->getOperand(0);

// Find the Non-adjacent form of the multiplier.
for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
if (E & 1) {
bool IsAdd = (E & 3) == 1;
E -= IsAdd ? 1 : -1;
SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getShiftAmountConstant(I, VT, DL));
ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
}
}

return Result;
}

// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG,
uint64_t MulAmt) {
Expand Down Expand Up @@ -15537,21 +15564,24 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();

if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();

if (VT != Subtarget.getXLenVT())
return SDValue();

const bool HasShlAdd = Subtarget.hasStdExtZba() ||
Subtarget.hasVendorXTHeadBa() ||
Subtarget.hasVendorXAndesPerf();
bool ShouldExpandMul =
(!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
!Subtarget.hasStdExtZmmul();
if (!ShouldExpandMul)
return SDValue();

ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!CNode)
return SDValue();
uint64_t MulAmt = CNode->getZExtValue();

const bool HasShlAdd = Subtarget.hasStdExtZba() ||
Subtarget.hasVendorXTHeadBa() ||
Subtarget.hasVendorXAndesPerf();

// WARNING: The code below is knowingly incorrect with regards to undef semantics.
// We're adding additional uses of X here, and in principle, we should be freezing
// X before doing so. However, adding freeze here causes real regressions, and no
Expand Down Expand Up @@ -15689,6 +15719,9 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
return V;

if (!Subtarget.hasStdExtZmmul())
return expandMulToNAFSequence(N, DAG, MulAmt);

return SDValue();
}

Expand Down
58 changes: 42 additions & 16 deletions llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -262,20 +262,33 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
; RV64I-NEXT: slli a3, a0, 10
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
; RV64I-NEXT: subw a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
; RV64I-NEXT: subw a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
; RV64I-NEXT: subw a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
; RV64I-NEXT: subw a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
; RV64I-NEXT: subw a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI2_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI2_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB2_2:
; RV64I-NEXT: li a0, 32
Expand Down Expand Up @@ -730,20 +743,33 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
;
; RV64I-LABEL: test_cttz_i32_zero_undef:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: negw a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addiw a1, a1, 1329
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
; RV64I-NEXT: slli a3, a0, 10
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
; RV64I-NEXT: subw a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
; RV64I-NEXT: subw a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
; RV64I-NEXT: subw a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
; RV64I-NEXT: subw a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
; RV64I-NEXT: subw a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: srliw a0, a0, 27
; RV64I-NEXT: lui a1, %hi(.LCPI6_0)
; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
; RV64I-NEXT: ret
;
; RV32M-LABEL: test_cttz_i32_zero_undef:
Expand Down
Loading
Loading