Skip to content

[AMDGPU][NFCI] Reorder AGPRs to allow skipping over them #105633

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3365,6 +3365,24 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
return nullptr;
}

unsigned SIRegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
#ifndef NDEBUG
for (unsigned K = AMDGPU::AGPR0; K < AMDGPU::NUM_TARGET_REGS; ++K) {
// Skip lo16 registers, they're "fake" and don't have a regclass assigned.
if (K >= AMDGPU::AGPR0_HI16 && K <= AMDGPU::AGPR255_HI16)
continue;
if (!isAGPR(MF.getRegInfo(), K))
report_fatal_error("register at index " + Twine(K) + " is not an AGPR!");
}
#endif

// Don't include AGPRs on functions that don't have them.
// This cuts about 4000 register (almost half of all registers) off.
return ST.hasMAIInsts() && MF.getInfo<SIMachineFunctionInfo>()->usesAGPRs(MF)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would hope hasMAIInsts is redundant with usesAGPRs, but this check is a mess anyway

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plus I think this hook shouldn't exist and RegisterClassInfo is underutilized

? AMDGPU::NUM_TARGET_REGS
: AMDGPU::AGPR0;
}

MCRegister SIRegisterInfo::getVCC() const {
return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
}
Expand Down Expand Up @@ -3456,9 +3474,7 @@ MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
AMDGPU::AGPR_32RegClass } ) {
if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
return Super;
}
if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
&AMDGPU::VGPR_32RegClass)) {
if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16, &RC))
return Super;
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
getConstrainedRegClassForOperand(const MachineOperand &MO,
const MachineRegisterInfo &MRI) const override;

unsigned getNumSupportedRegs(const MachineFunction &MF) const override;

const TargetRegisterClass *getBoolRC() const {
return isWave32 ? &AMDGPU::SReg_32RegClass
: &AMDGPU::SReg_64RegClass;
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,13 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>

multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
bit isVGPR = 0, bit isAGPR = 0> {
def _LO16 : SIReg<n#".l", regIdx, isVGPR, isAGPR>;
def _LO16 : SIReg<n#".l", regIdx, isVGPR, isAGPR> {
let PositionOrder = !if(isAGPR, 1, 0);
}
def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx, isVGPR, isAGPR,
/* isHi16 */ 1> {
let isArtificial = ArtificialHigh;
let PositionOrder = !if(isAGPR, 1, 0);
}
def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
!cast<Register>(NAME#"_HI16")]> {
Expand All @@ -174,6 +177,7 @@ multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
let HWEncoding{9} = isAGPR;

int Index = !cast<int>(regIdx);
let PositionOrder = !if(isAGPR, 1, 0);
}
}

Expand Down Expand Up @@ -699,6 +703,8 @@ def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16,
}
} // End HasAGPR = 1

let PositionOrder = 1 in {

// AGPR 64-bit registers
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;

Expand Down Expand Up @@ -737,6 +743,7 @@ def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;

// AGPR 1024-bit registers
def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
}

//===----------------------------------------------------------------------===//
// Register classes used as source and destination
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir

Large diffs are not rendered by default.

84 changes: 42 additions & 42 deletions llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ body: |
; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
;
; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ body: |
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 {
; CHECK-NEXT: internal [[COPY1]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16
; CHECK-NEXT: internal [[COPY1]].sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31
; CHECK-NEXT: internal [[COPY1]].sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16_sub29_lo16_sub29_hi16_sub30_lo16_sub30_hi16_sub31_lo16_sub31_hi16:av_1024_align2 = COPY [[COPY]].sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16_sub29_lo16_sub29_hi16_sub30_lo16_sub30_hi16_sub31_lo16_sub31_hi16
; CHECK-NEXT: internal [[COPY1]].sub17_sub18_sub19:av_1024_align2 = COPY [[COPY]].sub17_sub18_sub19
; CHECK-NEXT: }
; CHECK-NEXT: [[COPY1:%[0-9]+]].sub0:av_1024_align2 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit [[COPY1]].sub0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/frame-index.mir
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ body: |
; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
;
; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39
; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
Expand Down Expand Up @@ -732,7 +732,7 @@ body: |
; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
;
; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
; GFX90A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
; GFX90A: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr40, implicit $exec
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr41, implicit $exec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
;
; PEI-GFX908-LABEL: name: partial_copy
; PEI-GFX908: bb.0 (%ir-block.0):
; PEI-GFX908-NEXT: liveins: $agpr4, $sgpr4_sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7
; PEI-GFX908-NEXT: liveins: $sgpr4_sgpr5, $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7
; PEI-GFX908-NEXT: {{ $}}
; PEI-GFX908-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
; PEI-GFX908-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
Expand Down Expand Up @@ -77,7 +77,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
;
; PEI-GFX90A-LABEL: name: partial_copy
; PEI-GFX90A: bb.0 (%ir-block.0):
; PEI-GFX90A-NEXT: liveins: $agpr4, $sgpr4_sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7
; PEI-GFX90A-NEXT: liveins: $sgpr4_sgpr5, $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7
; PEI-GFX90A-NEXT: {{ $}}
; PEI-GFX90A-NEXT: $sgpr8_sgpr9_sgpr10_sgpr11 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3
; PEI-GFX90A-NEXT: $sgpr8 = S_ADD_U32 $sgpr8, $sgpr7, implicit-def $scc, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63

; GFX908-LABEL: name: regalloc_introduces_s_to_a_copy
; GFX908: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, $vgpr32_vgpr33_vgpr34_vgpr35, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7
; GFX908: liveins: $vgpr32_vgpr33_vgpr34_vgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr7
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr7, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX908-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
Expand Down
Loading
Loading