Skip to content

Commit 00dcbc3

Browse files
[AIE2P] Use ShouldAllocateLiveInterval to focus on alloc 3D virtual reg that are used by 3D instruction
1 parent ea7345e commit 00dcbc3

File tree

1 file changed

+170
-1
lines changed

1 file changed

+170
-1
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp

Lines changed: 170 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "AIE2PTargetMachine.h"
1616
#include "AIE2PTargetTransformInfo.h"
17+
#include "llvm/CodeGen/LiveInterval.h"
1718
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
1819

1920
using namespace llvm;
@@ -66,6 +67,173 @@ void AIE2PPassConfig::addPreRegBankSelect() {
6667
}
6768
}
6869

70+
static bool onlyAllocateLIwith3DInstruction(MachineRegisterInfo &MRI,
71+
const TargetInstrInfo &TII,
72+
const LiveInterval *LI) {
73+
const Register Reg = LI->reg();
74+
return std::any_of(
75+
MRI.use_nodbg_instructions(Reg).begin(),
76+
MRI.use_nodbg_instructions(Reg).end(), [&](const MachineInstr &MI) {
77+
switch (MI.getOpcode()) {
78+
case AIE2P::LDA_3D_dms_lda:
79+
case AIE2P::LDA_3D_dmv_lda_q:
80+
case AIE2P::LDA_3D_s16:
81+
case AIE2P::LDA_3D_s8:
82+
case AIE2P::LDA_3D_u16:
83+
case AIE2P::LDA_3D_u8:
84+
case AIE2P::LDA_TM_3D:
85+
case AIE2P::ST_3D_dms_sts:
86+
case AIE2P::ST_3D_dmv_sts_q:
87+
case AIE2P::ST_3D_s16:
88+
case AIE2P::ST_3D_s8:
89+
case AIE2P::ST_TM_3D:
90+
case AIE2P::VLDA_3D_128:
91+
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf:
92+
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf:
93+
case AIE2P::VLDA_3D_dmw_lda_w:
94+
case AIE2P::VLDA_3D_dmx_lda_bm:
95+
case AIE2P::VLDA_3D_dmx_lda_fifohl:
96+
case AIE2P::VLDA_3D_dmx_lda_x:
97+
case AIE2P::VLDB_3D_128:
98+
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0:
99+
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1:
100+
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0:
101+
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1:
102+
case AIE2P::VLDB_3D_dmw_ldb:
103+
case AIE2P::VLDB_3D_dmx_ldb_x:
104+
case AIE2P::VST_3D_128:
105+
case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf:
106+
case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf:
107+
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0:
108+
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1:
109+
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0:
110+
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1:
111+
case AIE2P::VST_3D_dmw_sts_w:
112+
case AIE2P::VST_3D_dmx_sts_bm:
113+
case AIE2P::VST_3D_dmx_sts_fifohl:
114+
case AIE2P::VST_3D_dmx_sts_x:
115+
case AIE2P::VLD_3D_w_pseudo:
116+
case AIE2P::VLD_3D_x_pseudo:
117+
case AIE2P::VLD_3D_128_pseudo:
118+
case AIE2P::PADDA_3D:
119+
case AIE2P::PADDB_3D:
120+
case AIE2P::PADDS_3D:
121+
case AIE2P::PADD_3D_pseudo:
122+
case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1:
123+
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0:
124+
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1:
125+
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0:
126+
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1:
127+
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0:
128+
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1:
129+
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0:
130+
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1:
131+
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0:
132+
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1:
133+
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0:
134+
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1:
135+
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0:
136+
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1:
137+
case AIE2P::VST_FLUSH_512_3D:
138+
case AIE2P::VST_FLUSH_512_CONV_3D:
139+
case AIE2P::VLDA_POP_512_3D:
140+
case AIE2P::VLDA_POP_544_3D:
141+
case AIE2P::VLDA_POP_576_3D:
142+
case AIE2P::VLDA_POP_640_3D:
143+
case AIE2P::VLDA_POP_704_3D:
144+
case AIE2P::VLDB_POP_512_3D:
145+
case AIE2P::VLDB_POP_544_3D:
146+
case AIE2P::VLDB_POP_576_3D:
147+
case AIE2P::VLDB_POP_640_3D:
148+
case AIE2P::VLDB_POP_704_3D:
149+
case AIE2P::VLD_POP_512_3D_pseudo:
150+
case AIE2P::VLD_POP_544_3D_pseudo:
151+
case AIE2P::VLD_POP_576_3D_pseudo:
152+
case AIE2P::VLD_POP_640_3D_pseudo:
153+
case AIE2P::VLD_POP_704_3D_pseudo:
154+
case AIE2P::LDA_3D_dms_lda_split:
155+
case AIE2P::LDA_3D_dmv_lda_q_split:
156+
case AIE2P::LDA_3D_s16_split:
157+
case AIE2P::LDA_3D_s8_split:
158+
case AIE2P::LDA_3D_u16_split:
159+
case AIE2P::LDA_3D_u8_split:
160+
case AIE2P::LDA_TM_3D_split:
161+
case AIE2P::ST_3D_dms_sts_split:
162+
case AIE2P::ST_3D_dmv_sts_q_split:
163+
case AIE2P::ST_3D_s16_split:
164+
case AIE2P::ST_3D_s8_split:
165+
case AIE2P::ST_TM_3D_split:
166+
case AIE2P::VLDA_3D_128_split:
167+
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf_split:
168+
case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf_split:
169+
case AIE2P::VLDA_3D_dmw_lda_w_split:
170+
case AIE2P::VLDA_3D_dmx_lda_bm_split:
171+
case AIE2P::VLDA_3D_dmx_lda_fifohl_split:
172+
case AIE2P::VLDA_3D_dmx_lda_x_split:
173+
case AIE2P::VLDB_3D_128_split:
174+
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0_split:
175+
case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1_split:
176+
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0_split:
177+
case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1_split:
178+
case AIE2P::VLDB_3D_dmw_ldb_split:
179+
case AIE2P::VLDB_3D_dmx_ldb_x_split:
180+
case AIE2P::VST_3D_128_split:
181+
case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf_split:
182+
case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf_split:
183+
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0_split:
184+
case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1_split:
185+
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0_split:
186+
case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1_split:
187+
case AIE2P::VST_3D_dmw_sts_w_split:
188+
case AIE2P::VST_3D_dmx_sts_bm_split:
189+
case AIE2P::VST_3D_dmx_sts_fifohl_split:
190+
case AIE2P::VST_3D_dmx_sts_x_split:
191+
case AIE2P::VLD_3D_w_pseudo_split:
192+
case AIE2P::VLD_3D_x_pseudo_split:
193+
case AIE2P::VLD_3D_128_pseudo_split:
194+
case AIE2P::PADDA_3D_split:
195+
case AIE2P::PADDB_3D_split:
196+
case AIE2P::PADDS_3D_split:
197+
case AIE2P::PADD_3D_pseudo_split:
198+
case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1_split:
199+
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0_split:
200+
case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1_split:
201+
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0_split:
202+
case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1_split:
203+
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split:
204+
case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1_split:
205+
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0_split:
206+
case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1_split:
207+
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0_split:
208+
case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1_split:
209+
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0_split:
210+
case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1_split:
211+
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0_split:
212+
case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1_split:
213+
case AIE2P::VST_FLUSH_512_3D_split:
214+
case AIE2P::VST_FLUSH_512_CONV_3D_split:
215+
case AIE2P::VLDA_POP_512_3D_split:
216+
case AIE2P::VLDA_POP_544_3D_split:
217+
case AIE2P::VLDA_POP_576_3D_split:
218+
case AIE2P::VLDA_POP_640_3D_split:
219+
case AIE2P::VLDA_POP_704_3D_split:
220+
case AIE2P::VLDB_POP_512_3D_split:
221+
case AIE2P::VLDB_POP_544_3D_split:
222+
case AIE2P::VLDB_POP_576_3D_split:
223+
case AIE2P::VLDB_POP_640_3D_split:
224+
case AIE2P::VLDB_POP_704_3D_split:
225+
case AIE2P::VLD_POP_512_3D_pseudo_split:
226+
case AIE2P::VLD_POP_544_3D_pseudo_split:
227+
case AIE2P::VLD_POP_576_3D_pseudo_split:
228+
case AIE2P::VLD_POP_640_3D_pseudo_split:
229+
case AIE2P::VLD_POP_704_3D_pseudo_split:
230+
return true;
231+
default:
232+
return false;
233+
}
234+
});
235+
}
236+
69237
static bool onlyAllocate3DRegisters(const TargetRegisterInfo &TRI,
70238
const TargetRegisterClass &RC) {
71239
return AIE2P::eDSRegClass.hasSubClassEq(&RC);
@@ -97,7 +265,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() {
97265
if (AllocateMRegsFirst)
98266
addPass(createGreedyRegisterAllocator(onlyAllocateMRegisters));
99267
if (EnableStagedRA) {
100-
addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters));
268+
addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters,
269+
onlyAllocateLIwith3DInstruction));
101270
addPass(createAIESuperRegRewriter());
102271
addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters));
103272
addPass(createAIESuperRegRewriter());

0 commit comments

Comments
 (0)