Skip to content

Commit b49e586

Browse files
[AIE2p] Use multi-slot pseudo for const COPY with unique def
1 parent 0a73921 commit b49e586

File tree

4 files changed

+41
-16
lines changed

4 files changed

+41
-16
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -557,10 +557,19 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
557557

558558
if (AIE2P::mMvSclSrcRegClass.contains(SrcReg) &&
559559
AIE2P::mMvSclDstRegClass.contains(DstReg)) {
560-
// Build MultiSlotPseudo in preference
561-
const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg);
562-
BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg)
563-
.addReg(SrcReg, getKillRegState(KillSrc));
560+
if (MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
561+
MI && MI->isMoveImmediate()) {
562+
// Try modifying scalar move to pseudo immediate move.
563+
const int64_t Imm = MI->getOperand(1).getImm();
564+
APInt ImmVal = APInt(64, Imm);
565+
auto OpCode = getConstantMovOpcode(MRI, DstReg, ImmVal);
566+
BuildMI(MBB, MBBI, DL, get(OpCode), DstReg).addImm(Imm);
567+
} else {
568+
// Build MultiSlotPseudo in preference
569+
const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg);
570+
BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg)
571+
.addReg(SrcReg, getKillRegState(KillSrc));
572+
}
564573
} else if ((AIE2P::eLRegClass.contains(SrcReg)) &&
565574
(AIE2P::eLRegClass.contains(DstReg))) {
566575
BuildMI(MBB, MBBI, DL, get(AIE2P::MOV_alu_mv_mv_mv_scl),
@@ -1138,11 +1147,15 @@ unsigned AIE2PInstrInfo::getConstantMovOpcode(MachineRegisterInfo &MRI,
11381147
unsigned int ImmSize = Val.getSignificantBits();
11391148

11401149
const TargetRegisterClass *DstRegClass = nullptr;
1141-
const RegClassOrRegBank &RCB = MRI.getRegClassOrRegBank(Reg);
1142-
if (const RegisterBank *RB = RCB.dyn_cast<const RegisterBank *>())
1143-
DstRegClass = &TRI->getMinClassForRegBank(*RB, MRI.getType(Reg));
1144-
if (auto *TRC = RCB.dyn_cast<const TargetRegisterClass *>())
1145-
DstRegClass = TRC;
1150+
if (Register::isVirtualRegister(Reg)) {
1151+
const RegClassOrRegBank &RCB = MRI.getRegClassOrRegBank(Reg);
1152+
if (const RegisterBank *RB = RCB.dyn_cast<const RegisterBank *>())
1153+
DstRegClass = &TRI->getMinClassForRegBank(*RB, MRI.getType(Reg));
1154+
if (auto *TRC = RCB.dyn_cast<const TargetRegisterClass *>())
1155+
DstRegClass = TRC;
1156+
} else {
1157+
DstRegClass = TRI->getMinimalPhysRegClass(Reg);
1158+
}
11461159
assert(DstRegClass != nullptr && "RC cannot be null");
11471160
if (ImmSize <= 11) {
11481161
if (regClassMatches(AIE2P::mAluCgRegClass, DstRegClass, Reg))

llvm/test/CodeGen/AIE/aie2p/end-to-end/conv2d_bfp16_convert.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ define weak_odr dso_local void @convert_bf16_to_bfp16(ptr noalias %in, ptr noali
2323
; CHECK-NEXT: nop
2424
; CHECK-NEXT: nop
2525
; CHECK-NEXT: movx r24, #0
26-
; CHECK-NEXT: mova dj0, #0; mov r26, r24
27-
; CHECK-NEXT: vldb.fill.512 [p0, lf0, r24]; mov dj1, dj0
28-
; CHECK-NEXT: movs dc1, dj0; vldb.pop.512 x0, [p0, lf0, r24]; mov dn1, dn0
26+
; CHECK-NEXT: mov r26, r24
27+
; CHECK-NEXT: mova dj0, #0; vldb.fill.512 [p0, lf0, r24]
28+
; CHECK-NEXT: mova dc1, #0; vldb.pop.512 x0, [p0, lf0, r24]; movs dj1, dj0; mov dn1, dn0
2929
; CHECK-NEXT: vldb.pop.512.2d x2, [p0, lf0, r24, d1]
3030
; CHECK-NEXT: nop
3131
; CHECK-NEXT: vldb.fill.512 [p0, lf0, r24]
@@ -35,7 +35,7 @@ define weak_odr dso_local void @convert_bf16_to_bfp16(ptr noalias %in, ptr noali
3535
; CHECK-NEXT: nopa ; vldb.fill.512 [p0, lf0, r24]; nops ; nopxm ; nopv
3636
; CHECK-NEXT: nopa ; vldb.pop.512 x0, [p0, lf0, r24]; nops ; nopx ; vconv.fp32.bf16 cml0, x0; nopv
3737
; CHECK-NEXT: nopa ; vldb.pop.512.2d x2, [p0, lf0, r24, d1]; nops ; nopx ; vconv.fp32.bf16 cmh0, x2; nopv
38-
; CHECK-NEXT: nopa ; nopb ; movs dc0, dj0; nopx ; mov p2, p1; nopv
38+
; CHECK-NEXT: mova dc0, #0; nopb ; nops ; nopx ; mov p2, p1; nopv
3939
; CHECK-NEXT: // implicit-def: $sf
4040
; CHECK-NEXT: .p2align 4
4141
; CHECK-NEXT: .LBB0_1: // %for.body

llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ define void @gelu_fn(ptr noalias %ifm, ptr noalias %ofm, ptr nonnull align 64 de
5252
; CHECK-NEXT: nop
5353
; CHECK-NEXT: vconv.bf16.fp32 x5, cml1
5454
; CHECK-NEXT: vconv.bf16.fp32 x8, cml4; movxm ls, #.LBB0_1; vmul.f dm4, x10, x4, r2
55-
; CHECK-NEXT: mova r3, #0; nopb ; vconv.bf16.fp32 x7, cml2; movxm le, #.L_LEnd0; vmul.f dm4, x5, x4, r2
56-
; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; mov s0, r3; vadd.f dm2, dm1, dm2, r0
55+
; CHECK-NEXT: vconv.bf16.fp32 x7, cml2; movxm le, #.L_LEnd0; vmul.f dm4, x5, x4, r2
56+
; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; mov s0, #0; vadd.f dm2, dm1, dm2, r0
5757
; CHECK-NEXT: vmov cml2, cml0; vmul.f dm3, x7, x2, r2
58-
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vfloor.s32.bf16 x1, wl8, s0; movx r4, #-5; vmul.f dm4, x5, x4, r2
58+
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; vfloor.s32.bf16 x1, wl8, s0; movx r4, #-5; mov r3, #0; vmul.f dm4, x5, x4, r2
5959
; CHECK-NEXT: vfloor.s32.bf16 x3, wh8, s0; lshl r4, r1, r4; vbcst.16 x6, r3
6060
; CHECK-NEXT: mova r1, #2; vconv.bf16.fp32 x10, cml4; add.nc lc, r4, #-7
6161
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vshuffle x1, x1, x3, r1; nopv

llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ body: |
4747
$r1 = COPY $r0
4848
...
4949

50+
---
51+
name: COPY_GPR_const
52+
alignment: 16
53+
body: |
54+
bb.0 (align 16):
55+
; CHECK-LABEL: name: COPY_GPR_const
56+
; CHECK: $r0 = MOV_RLC_imm11_pseudo 10
57+
; CHECK-NEXT: $r1 = MOV_RLC_imm11_pseudo 10
58+
$r0 = MOV_RLC_imm11_pseudo 10
59+
$r1 = COPY $r0
60+
...
61+
5062
---
5163
name: COPY_non_GPR
5264
alignment: 16

0 commit comments

Comments
 (0)