Skip to content

[AIE2p] Use multi-slot pseudo for const COPY with unique def #454

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,10 +557,19 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,

if (AIE2P::mMvSclSrcRegClass.contains(SrcReg) &&
AIE2P::mMvSclDstRegClass.contains(DstReg)) {
// Build MultiSlotPseudo in preference
const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg);
BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
if (MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
MI && MI->isMoveImmediate()) {
// Try modifying scalar move to pseudo immediate move.
const int64_t Imm = MI->getOperand(1).getImm();
APInt ImmVal = APInt(64, Imm);
auto OpCode = getConstantMovOpcode(MRI, DstReg, ImmVal);
BuildMI(MBB, MBBI, DL, get(OpCode), DstReg).addImm(Imm);
} else {
// Build MultiSlotPseudo in preference
const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg);
BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
} else if ((AIE2P::eLRegClass.contains(SrcReg)) &&
(AIE2P::eLRegClass.contains(DstReg))) {
BuildMI(MBB, MBBI, DL, get(AIE2P::MOV_alu_mv_mv_mv_scl),
Expand Down Expand Up @@ -1138,11 +1147,15 @@ unsigned AIE2PInstrInfo::getConstantMovOpcode(MachineRegisterInfo &MRI,
unsigned int ImmSize = Val.getSignificantBits();

const TargetRegisterClass *DstRegClass = nullptr;
const RegClassOrRegBank &RCB = MRI.getRegClassOrRegBank(Reg);
if (const RegisterBank *RB = RCB.dyn_cast<const RegisterBank *>())
DstRegClass = &TRI->getMinClassForRegBank(*RB, MRI.getType(Reg));
if (auto *TRC = RCB.dyn_cast<const TargetRegisterClass *>())
DstRegClass = TRC;
if (Register::isVirtualRegister(Reg)) {
const RegClassOrRegBank &RCB = MRI.getRegClassOrRegBank(Reg);
if (const RegisterBank *RB = RCB.dyn_cast<const RegisterBank *>())
DstRegClass = &TRI->getMinClassForRegBank(*RB, MRI.getType(Reg));
if (auto *TRC = RCB.dyn_cast<const TargetRegisterClass *>())
DstRegClass = TRC;
} else {
DstRegClass = TRI->getMinimalPhysRegClass(Reg);
}
assert(DstRegClass != nullptr && "RC cannot be null");
if (ImmSize <= 11) {
if (regClassMatches(AIE2P::mAluCgRegClass, DstRegClass, Reg))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ define weak_odr dso_local void @convert_bf16_to_bfp16(ptr noalias %in, ptr noali
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: movx r24, #0
; CHECK-NEXT: mova dj0, #0; mov r26, r24
; CHECK-NEXT: vldb.fill.512 [p0, lf0, r24]; mov dj1, dj0
; CHECK-NEXT: movs dc1, dj0; vldb.pop.512 x0, [p0, lf0, r24]; mov dn1, dn0
; CHECK-NEXT: mov r26, r24
; CHECK-NEXT: mova dj0, #0; vldb.fill.512 [p0, lf0, r24]
; CHECK-NEXT: mova dc1, #0; vldb.pop.512 x0, [p0, lf0, r24]; movs dj1, dj0; mov dn1, dn0
; CHECK-NEXT: vldb.pop.512.2d x2, [p0, lf0, r24, d1]
; CHECK-NEXT: nop
; CHECK-NEXT: vldb.fill.512 [p0, lf0, r24]
Expand All @@ -35,7 +35,7 @@ define weak_odr dso_local void @convert_bf16_to_bfp16(ptr noalias %in, ptr noali
; CHECK-NEXT: nopa ; vldb.fill.512 [p0, lf0, r24]; nops ; nopxm ; nopv
; CHECK-NEXT: nopa ; vldb.pop.512 x0, [p0, lf0, r24]; nops ; nopx ; vconv.fp32.bf16 cml0, x0; nopv
; CHECK-NEXT: nopa ; vldb.pop.512.2d x2, [p0, lf0, r24, d1]; nops ; nopx ; vconv.fp32.bf16 cmh0, x2; nopv
; CHECK-NEXT: nopa ; nopb ; movs dc0, dj0; nopx ; mov p2, p1; nopv
; CHECK-NEXT: mova dc0, #0; nopb ; nops ; nopx ; mov p2, p1; nopv
; CHECK-NEXT: // implicit-def: $sf
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: // %for.body
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AIE/aie2p/end-to-end/gelu-templated.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ define void @gelu_fn(ptr noalias %ifm, ptr noalias %ofm, ptr nonnull align 64 de
; CHECK-NEXT: nop
; CHECK-NEXT: vconv.bf16.fp32 x5, cml1
; CHECK-NEXT: vconv.bf16.fp32 x8, cml4; movxm ls, #.LBB0_1; vmul.f dm4, x10, x4, r2
; CHECK-NEXT: mova r3, #0; nopb ; vconv.bf16.fp32 x7, cml2; movxm le, #.L_LEnd0; vmul.f dm4, x5, x4, r2
; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; mov s0, r3; vadd.f dm2, dm1, dm2, r0
; CHECK-NEXT: vconv.bf16.fp32 x7, cml2; movxm le, #.L_LEnd0; vmul.f dm4, x5, x4, r2
; CHECK-NEXT: vconv.bf16.fp32 x5, cml3; mov s0, #0; vadd.f dm2, dm1, dm2, r0
; CHECK-NEXT: vmov cml2, cml0; vmul.f dm3, x7, x2, r2
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; vfloor.s32.bf16 x1, wl8, s0; movx r4, #-5; vmul.f dm4, x5, x4, r2
; CHECK-NEXT: vlda.conv.fp32.bf16 cml1, [p0], #64; nopb ; vfloor.s32.bf16 x1, wl8, s0; movx r4, #-5; mov r3, #0; vmul.f dm4, x5, x4, r2
; CHECK-NEXT: vfloor.s32.bf16 x3, wh8, s0; lshl r4, r1, r4; vbcst.16 x6, r3
; CHECK-NEXT: mova r1, #2; vconv.bf16.fp32 x10, cml4; add.nc lc, r4, #-7
; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; vshuffle x1, x1, x3, r1; nopv
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ body: |
$r1 = COPY $r0
...

---
name: COPY_GPR_const
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: COPY_GPR_const
; CHECK: $r0 = MOV_RLC_imm11_pseudo 10
; CHECK-NEXT: $r1 = MOV_RLC_imm11_pseudo 10
$r0 = MOV_RLC_imm11_pseudo 10
$r1 = COPY $r0
...

---
name: COPY_non_GPR
alignment: 16
Expand Down