Skip to content

[LoongArch] Optimize for immediate value materialization using BSTRINS_D instruction #106332

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1291,14 +1291,32 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc,
Imm = SignExtend64<32>(Imm);

for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
unsigned Opc = Inst.Opc;
if (Opc == LoongArch::LU12I_W)
Out.emitInstruction(MCInstBuilder(Opc).addReg(DestReg).addImm(Inst.Imm),
getSTI());
else
switch (Inst.Opc) {
case LoongArch::LU12I_W:
Out.emitInstruction(
MCInstBuilder(Opc).addReg(DestReg).addReg(SrcReg).addImm(Inst.Imm),
MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm), getSTI());
break;
case LoongArch::ADDI_W:
case LoongArch::ORI:
case LoongArch::LU32I_D:
case LoongArch::LU52I_D:
Out.emitInstruction(
MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
Inst.Imm),
getSTI());
break;
case LoongArch::BSTRINS_D:
Out.emitInstruction(MCInstBuilder(Inst.Opc)
.addReg(DestReg)
.addReg(SrcReg)
.addReg(SrcReg)
.addImm(Inst.Imm >> 32)
.addImm(Inst.Imm & 0xFF),
getSTI());
break;
default:
llvm_unreachable("unexpected opcode generated by LoongArchMatInt");
}
SrcReg = DestReg;
}
}
Expand Down
22 changes: 19 additions & 3 deletions llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,26 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
// The instructions in the sequence are handled here.
for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) {
SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT);
if (Inst.Opc == LoongArch::LU12I_W)
Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm);
else
switch (Inst.Opc) {
case LoongArch::LU12I_W:
Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SDImm);
break;
case LoongArch::ADDI_W:
case LoongArch::ORI:
case LoongArch::LU32I_D:
case LoongArch::LU52I_D:
Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm);
break;
case LoongArch::BSTRINS_D:
Result = CurDAG->getMachineNode(
Inst.Opc, DL, GRLenVT,
{SrcReg, SrcReg,
CurDAG->getTargetConstant(Inst.Imm >> 32, DL, GRLenVT),
CurDAG->getTargetConstant(Inst.Imm & 0xFF, DL, GRLenVT)});
break;
default:
llvm_unreachable("unexpected opcode generated by LoongArchMatInt");
}
SrcReg = SDValue(Result, 0);
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,14 @@ void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB,
.addImm(Inst.Imm)
.setMIFlag(Flag);
break;
case LoongArch::BSTRINS_D:
BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg)
.addReg(SrcReg, RegState::Kill)
.addReg(SrcReg, RegState::Kill)
.addImm(Inst.Imm >> 32)
.addImm(Inst.Imm & 0xFF)
.setMIFlag(Flag);
break;
default:
assert(false && "Unknown insn emitted by LoongArchMatInt");
}
Expand Down
73 changes: 73 additions & 0 deletions llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
const int64_t Lo12 = Val & 0xFFF;
InstSeq Insts;

// LU52I_D used for: Bits[63:52] | Bits[51:0].
if (Highest12 != 0 && SignExtend64<52>(Val) == 0) {
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));
return Insts;
}

// lo32
if (Hi20 == 0)
Insts.push_back(Inst(LoongArch::ORI, Lo12));
else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20))
Expand All @@ -41,11 +43,82 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) {
Insts.push_back(Inst(LoongArch::ORI, Lo12));
}

// hi32
// Higher20
if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20))
Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20)));

// Highest12
if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12))
Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12)));

size_t N = Insts.size();
if (N < 3)
return Insts;

// When the number of instruction sequences is greater than 2, we have the
// opportunity to optimize using the BSTRINS_D instruction. The scenario is as
// follows:
//
// N of Insts = 3
// 1. ORI + LU32I_D + LU52I_D => ORI + BSTRINS_D, TmpVal = ORI
// 2. ADDI_W + LU32I_D + LU52I_D => ADDI_W + BSTRINS_D, TmpVal = ADDI_W
// 3. LU12I_W + ORI + LU32I_D => ORI + BSTRINS_D, TmpVal = ORI
// 4. LU12I_W + LU32I_D + LU52I_D => LU12I_W + BSTRINS_D, TmpVal = LU12I_W
//
// N of Insts = 4
// 5. LU12I_W + ORI + LU32I_D + LU52I_D => LU12I_W + ORI + BSTRINS_D
// => ORI + LU52I_D + BSTRINS_D
// TmpVal = (LU12I_W | ORI) or (ORI | LU52I_D)
// The BSTRINS_D instruction will use the `TmpVal` to construct the `Val`.
uint64_t TmpVal1 = 0;
uint64_t TmpVal2 = 0;
switch (Insts[0].Opc) {
default:
llvm_unreachable("unexpected opcode");
break;
case LoongArch::LU12I_W:
if (Insts[1].Opc == LoongArch::ORI) {
TmpVal1 = Insts[1].Imm;
if (N == 3)
break;
TmpVal2 = Insts[3].Imm << 52 | TmpVal1;
}
TmpVal1 |= Insts[0].Imm << 12;
break;
case LoongArch::ORI:
case LoongArch::ADDI_W:
TmpVal1 = Insts[0].Imm;
break;
}

for (uint64_t Msb = 32; Msb < 64; ++Msb) {
uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1);
for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) {
uint64_t LowMask = (1ULL << Lsb) - 1;
uint64_t Mask = HighMask | LowMask;
uint64_t LsbToZero = TmpVal1 & ((1UL << (Msb - Lsb + 1)) - 1);
uint64_t MsbToLsb = LsbToZero << Lsb;
if ((MsbToLsb | (TmpVal1 & Mask)) == (uint64_t)Val) {
if (Insts[1].Opc == LoongArch::ORI && N == 3)
Insts[0] = Insts[1];
Insts.pop_back_n(2);
Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
return Insts;
}
if (TmpVal2 != 0) {
LsbToZero = TmpVal2 & ((1UL << (Msb - Lsb + 1)) - 1);
MsbToLsb = LsbToZero << Lsb;
if ((MsbToLsb | (TmpVal2 & Mask)) == (uint64_t)Val) {
Insts[0] = Insts[1];
Insts[1] = Insts[3];
Insts.pop_back_n(2);
Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb));
return Insts;
}
}
}
}

return Insts;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ namespace llvm {
namespace LoongArchMatInt {
struct Inst {
unsigned Opc;
// Imm: Opc's imm operand, if Opc == BSTRINS_D, Imm = MSB << 32 | LSB.
int64_t Imm;
Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {}
};
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -338,14 +338,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; LA64-NEXT: srli.d $a1, $a0, 1
; LA64-NEXT: lu12i.w $a2, 349525
; LA64-NEXT: ori $a2, $a2, 1365
; LA64-NEXT: lu32i.d $a2, 349525
; LA64-NEXT: lu52i.d $a2, $a2, 1365
; LA64-NEXT: bstrins.d $a2, $a2, 62, 32
; LA64-NEXT: and $a1, $a1, $a2
; LA64-NEXT: sub.d $a0, $a0, $a1
; LA64-NEXT: lu12i.w $a1, 209715
; LA64-NEXT: ori $a1, $a1, 819
; LA64-NEXT: lu32i.d $a1, 209715
; LA64-NEXT: lu52i.d $a1, $a1, 819
; LA64-NEXT: bstrins.d $a1, $a1, 61, 32
; LA64-NEXT: and $a2, $a0, $a1
; LA64-NEXT: srli.d $a0, $a0, 2
; LA64-NEXT: and $a0, $a0, $a1
Expand All @@ -354,13 +352,11 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
; LA64-NEXT: add.d $a0, $a0, $a1
; LA64-NEXT: lu12i.w $a1, 61680
; LA64-NEXT: ori $a1, $a1, 3855
; LA64-NEXT: lu32i.d $a1, -61681
; LA64-NEXT: lu52i.d $a1, $a1, 240
; LA64-NEXT: bstrins.d $a1, $a1, 59, 32
; LA64-NEXT: and $a0, $a0, $a1
; LA64-NEXT: lu12i.w $a1, 4112
; LA64-NEXT: ori $a1, $a1, 257
; LA64-NEXT: lu32i.d $a1, 65793
; LA64-NEXT: lu52i.d $a1, $a1, 16
; LA64-NEXT: bstrins.d $a1, $a1, 56, 32
; LA64-NEXT: mul.d $a0, $a0, $a1
; LA64-NEXT: srli.d $a0, $a0, 56
; LA64-NEXT: ret
Expand Down
25 changes: 9 additions & 16 deletions llvm/test/CodeGen/LoongArch/imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ define i64 @imm0008000000000fff() {
; CHECK-LABEL: imm0008000000000fff:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a0, $zero, 4095
; CHECK-NEXT: lu32i.d $a0, -524288
; CHECK-NEXT: lu52i.d $a0, $a0, 0
; CHECK-NEXT: bstrins.d $a0, $a0, 51, 51
; CHECK-NEXT: ret
ret i64 2251799813689343
}
Expand Down Expand Up @@ -168,9 +167,8 @@ define i64 @imm0008000080000800() {
define i64 @imm14000000a() {
; CHECK-LABEL: imm14000000a:
; CHECK: # %bb.0:
; CHECK-NEXT: lu12i.w $a0, 262144
; CHECK-NEXT: ori $a0, $a0, 10
; CHECK-NEXT: lu32i.d $a0, 1
; CHECK-NEXT: ori $a0, $zero, 10
; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29
; CHECK-NEXT: ret
ret i64 5368709130
}
Expand All @@ -179,8 +177,7 @@ define i64 @imm0fff000000000fff() {
; CHECK-LABEL: imm0fff000000000fff:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a0, $zero, 4095
; CHECK-NEXT: lu32i.d $a0, -65536
; CHECK-NEXT: lu52i.d $a0, $a0, 255
; CHECK-NEXT: bstrins.d $a0, $a0, 59, 48
; CHECK-NEXT: ret
ret i64 1152640029630140415
}
Expand All @@ -189,8 +186,7 @@ define i64 @immffecffffffffffec() {
; CHECK-LABEL: immffecffffffffffec:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.w $a0, $zero, -20
; CHECK-NEXT: lu32i.d $a0, -196609
; CHECK-NEXT: lu52i.d $a0, $a0, -2
; CHECK-NEXT: bstrins.d $a0, $a0, 52, 48
; CHECK-NEXT: ret
ret i64 -5348024557502484
}
Expand All @@ -199,8 +195,7 @@ define i64 @imm1c000000700000() {
; CHECK-LABEL: imm1c000000700000:
; CHECK: # %bb.0:
; CHECK-NEXT: lu12i.w $a0, 1792
; CHECK-NEXT: lu32i.d $a0, -262144
; CHECK-NEXT: lu52i.d $a0, $a0, 1
; CHECK-NEXT: bstrins.d $a0, $a0, 52, 30
; CHECK-NEXT: ret
ret i64 7881299355238400
}
Expand All @@ -210,19 +205,17 @@ define i64 @immf0f0f0f0f0f0f0f0() {
; CHECK: # %bb.0:
; CHECK-NEXT: lu12i.w $a0, -61681
; CHECK-NEXT: ori $a0, $a0, 240
; CHECK-NEXT: lu32i.d $a0, 61680
; CHECK-NEXT: lu52i.d $a0, $a0, -241
; CHECK-NEXT: bstrins.d $a0, $a0, 59, 32
; CHECK-NEXT: ret
ret i64 -1085102592571150096
}

define i64 @imm110000014000000a() {
; CHECK-LABEL: imm110000014000000a:
; CHECK: # %bb.0:
; CHECK-NEXT: lu12i.w $a0, 262144
; CHECK-NEXT: ori $a0, $a0, 10
; CHECK-NEXT: lu32i.d $a0, 1
; CHECK-NEXT: ori $a0, $zero, 10
; CHECK-NEXT: lu52i.d $a0, $a0, 272
; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29
; CHECK-NEXT: ret
ret i64 1224979104013484042
}
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -973,9 +973,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
; LA64NOPIC-LABEL: ld_sd_constant:
; LA64NOPIC: # %bb.0:
; LA64NOPIC-NEXT: lu12i.w $a1, -136485
; LA64NOPIC-NEXT: ori $a1, $a1, 3823
; LA64NOPIC-NEXT: lu32i.d $a1, -147729
; LA64NOPIC-NEXT: lu52i.d $a2, $a1, -534
; LA64NOPIC-NEXT: ori $a2, $a1, 3823
; LA64NOPIC-NEXT: bstrins.d $a2, $a2, 61, 32
; LA64NOPIC-NEXT: ld.d $a1, $a2, 0
; LA64NOPIC-NEXT: st.d $a0, $a2, 0
; LA64NOPIC-NEXT: move $a0, $a1
Expand All @@ -984,9 +983,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind {
; LA64PIC-LABEL: ld_sd_constant:
; LA64PIC: # %bb.0:
; LA64PIC-NEXT: lu12i.w $a1, -136485
; LA64PIC-NEXT: ori $a1, $a1, 3823
; LA64PIC-NEXT: lu32i.d $a1, -147729
; LA64PIC-NEXT: lu52i.d $a2, $a1, -534
; LA64PIC-NEXT: ori $a2, $a1, 3823
; LA64PIC-NEXT: bstrins.d $a2, $a2, 61, 32
; LA64PIC-NEXT: ld.d $a1, $a2, 0
; LA64PIC-NEXT: st.d $a0, $a2, 0
; LA64PIC-NEXT: move $a0, $a1
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/LoongArch/merge-base-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1128,8 +1128,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind {
; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_a64)
; LA64-NEXT: lu12i.w $a1, 279556
; LA64-NEXT: ori $a1, $a1, 1088
; LA64-NEXT: lu32i.d $a1, 17472
; LA64-NEXT: lu52i.d $a1, $a1, 1092
; LA64-NEXT: bstrins.d $a1, $a1, 62, 32
; LA64-NEXT: add.d $a0, $a0, $a1
; LA64-NEXT: ret
;
Expand All @@ -1142,8 +1141,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind {
; LA64-LARGE-NEXT: add.d $a0, $a1, $a0
; LA64-LARGE-NEXT: lu12i.w $a1, 279556
; LA64-LARGE-NEXT: ori $a1, $a1, 1088
; LA64-LARGE-NEXT: lu32i.d $a1, 17472
; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092
; LA64-LARGE-NEXT: bstrins.d $a1, $a1, 62, 32
; LA64-LARGE-NEXT: add.d $a0, $a0, $a1
; LA64-LARGE-NEXT: ret
entry:
Expand Down
Loading
Loading