|
14 | 14 |
|
15 | 15 | #include "AIE2PTargetMachine.h"
|
16 | 16 | #include "AIE2PTargetTransformInfo.h"
|
| 17 | +#include "llvm/CodeGen/LiveInterval.h" |
17 | 18 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
18 | 19 |
|
19 | 20 | using namespace llvm;
|
@@ -66,6 +67,173 @@ void AIE2PPassConfig::addPreRegBankSelect() {
|
66 | 67 | }
|
67 | 68 | }
|
68 | 69 |
|
| 70 | +static bool onlyAllocateLIwith3DInstruction(MachineRegisterInfo &MRI, |
| 71 | + const TargetInstrInfo &TII, |
| 72 | + const LiveInterval *LI) { |
| 73 | + const Register Reg = LI->reg(); |
| 74 | + return std::any_of( |
| 75 | + MRI.use_nodbg_instructions(Reg).begin(), |
| 76 | + MRI.use_nodbg_instructions(Reg).end(), [&](const MachineInstr &MI) { |
| 77 | + switch (MI.getOpcode()) { |
| 78 | + case AIE2P::LDA_3D_dms_lda: |
| 79 | + case AIE2P::LDA_3D_dmv_lda_q: |
| 80 | + case AIE2P::LDA_3D_s16: |
| 81 | + case AIE2P::LDA_3D_s8: |
| 82 | + case AIE2P::LDA_3D_u16: |
| 83 | + case AIE2P::LDA_3D_u8: |
| 84 | + case AIE2P::LDA_TM_3D: |
| 85 | + case AIE2P::ST_3D_dms_sts: |
| 86 | + case AIE2P::ST_3D_dmv_sts_q: |
| 87 | + case AIE2P::ST_3D_s16: |
| 88 | + case AIE2P::ST_3D_s8: |
| 89 | + case AIE2P::ST_TM_3D: |
| 90 | + case AIE2P::VLDA_3D_128: |
| 91 | + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf: |
| 92 | + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf: |
| 93 | + case AIE2P::VLDA_3D_dmw_lda_w: |
| 94 | + case AIE2P::VLDA_3D_dmx_lda_bm: |
| 95 | + case AIE2P::VLDA_3D_dmx_lda_fifohl: |
| 96 | + case AIE2P::VLDA_3D_dmx_lda_x: |
| 97 | + case AIE2P::VLDB_3D_128: |
| 98 | + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0: |
| 99 | + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1: |
| 100 | + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0: |
| 101 | + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1: |
| 102 | + case AIE2P::VLDB_3D_dmw_ldb: |
| 103 | + case AIE2P::VLDB_3D_dmx_ldb_x: |
| 104 | + case AIE2P::VST_3D_128: |
| 105 | + case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf: |
| 106 | + case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf: |
| 107 | + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0: |
| 108 | + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1: |
| 109 | + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0: |
| 110 | + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1: |
| 111 | + case AIE2P::VST_3D_dmw_sts_w: |
| 112 | + case AIE2P::VST_3D_dmx_sts_bm: |
| 113 | + case AIE2P::VST_3D_dmx_sts_fifohl: |
| 114 | + case AIE2P::VST_3D_dmx_sts_x: |
| 115 | + case AIE2P::VLD_3D_w_pseudo: |
| 116 | + case AIE2P::VLD_3D_x_pseudo: |
| 117 | + case AIE2P::VLD_3D_128_pseudo: |
| 118 | + case AIE2P::PADDA_3D: |
| 119 | + case AIE2P::PADDB_3D: |
| 120 | + case AIE2P::PADDS_3D: |
| 121 | + case AIE2P::PADD_3D_pseudo: |
| 122 | + case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1: |
| 123 | + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0: |
| 124 | + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1: |
| 125 | + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0: |
| 126 | + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1: |
| 127 | + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0: |
| 128 | + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1: |
| 129 | + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0: |
| 130 | + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1: |
| 131 | + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0: |
| 132 | + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1: |
| 133 | + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0: |
| 134 | + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1: |
| 135 | + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0: |
| 136 | + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1: |
| 137 | + case AIE2P::VST_FLUSH_512_3D: |
| 138 | + case AIE2P::VST_FLUSH_512_CONV_3D: |
| 139 | + case AIE2P::VLDA_POP_512_3D: |
| 140 | + case AIE2P::VLDA_POP_544_3D: |
| 141 | + case AIE2P::VLDA_POP_576_3D: |
| 142 | + case AIE2P::VLDA_POP_640_3D: |
| 143 | + case AIE2P::VLDA_POP_704_3D: |
| 144 | + case AIE2P::VLDB_POP_512_3D: |
| 145 | + case AIE2P::VLDB_POP_544_3D: |
| 146 | + case AIE2P::VLDB_POP_576_3D: |
| 147 | + case AIE2P::VLDB_POP_640_3D: |
| 148 | + case AIE2P::VLDB_POP_704_3D: |
| 149 | + case AIE2P::VLD_POP_512_3D_pseudo: |
| 150 | + case AIE2P::VLD_POP_544_3D_pseudo: |
| 151 | + case AIE2P::VLD_POP_576_3D_pseudo: |
| 152 | + case AIE2P::VLD_POP_640_3D_pseudo: |
| 153 | + case AIE2P::VLD_POP_704_3D_pseudo: |
| 154 | + case AIE2P::LDA_3D_dms_lda_split: |
| 155 | + case AIE2P::LDA_3D_dmv_lda_q_split: |
| 156 | + case AIE2P::LDA_3D_s16_split: |
| 157 | + case AIE2P::LDA_3D_s8_split: |
| 158 | + case AIE2P::LDA_3D_u16_split: |
| 159 | + case AIE2P::LDA_3D_u8_split: |
| 160 | + case AIE2P::LDA_TM_3D_split: |
| 161 | + case AIE2P::ST_3D_dms_sts_split: |
| 162 | + case AIE2P::ST_3D_dmv_sts_q_split: |
| 163 | + case AIE2P::ST_3D_s16_split: |
| 164 | + case AIE2P::ST_3D_s8_split: |
| 165 | + case AIE2P::ST_TM_3D_split: |
| 166 | + case AIE2P::VLDA_3D_128_split: |
| 167 | + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf_split: |
| 168 | + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf_split: |
| 169 | + case AIE2P::VLDA_3D_dmw_lda_w_split: |
| 170 | + case AIE2P::VLDA_3D_dmx_lda_bm_split: |
| 171 | + case AIE2P::VLDA_3D_dmx_lda_fifohl_split: |
| 172 | + case AIE2P::VLDA_3D_dmx_lda_x_split: |
| 173 | + case AIE2P::VLDB_3D_128_split: |
| 174 | + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0_split: |
| 175 | + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1_split: |
| 176 | + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0_split: |
| 177 | + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1_split: |
| 178 | + case AIE2P::VLDB_3D_dmw_ldb_split: |
| 179 | + case AIE2P::VLDB_3D_dmx_ldb_x_split: |
| 180 | + case AIE2P::VST_3D_128_split: |
| 181 | + case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf_split: |
| 182 | + case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf_split: |
| 183 | + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0_split: |
| 184 | + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1_split: |
| 185 | + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0_split: |
| 186 | + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1_split: |
| 187 | + case AIE2P::VST_3D_dmw_sts_w_split: |
| 188 | + case AIE2P::VST_3D_dmx_sts_bm_split: |
| 189 | + case AIE2P::VST_3D_dmx_sts_fifohl_split: |
| 190 | + case AIE2P::VST_3D_dmx_sts_x_split: |
| 191 | + case AIE2P::VLD_3D_w_pseudo_split: |
| 192 | + case AIE2P::VLD_3D_x_pseudo_split: |
| 193 | + case AIE2P::VLD_3D_128_pseudo_split: |
| 194 | + case AIE2P::PADDA_3D_split: |
| 195 | + case AIE2P::PADDB_3D_split: |
| 196 | + case AIE2P::PADDS_3D_split: |
| 197 | + case AIE2P::PADD_3D_pseudo_split: |
| 198 | + case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1_split: |
| 199 | + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0_split: |
| 200 | + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1_split: |
| 201 | + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0_split: |
| 202 | + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1_split: |
| 203 | + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split: |
| 204 | + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1_split: |
| 205 | + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0_split: |
| 206 | + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1_split: |
| 207 | + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0_split: |
| 208 | + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1_split: |
| 209 | + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0_split: |
| 210 | + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1_split: |
| 211 | + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0_split: |
| 212 | + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1_split: |
| 213 | + case AIE2P::VST_FLUSH_512_3D_split: |
| 214 | + case AIE2P::VST_FLUSH_512_CONV_3D_split: |
| 215 | + case AIE2P::VLDA_POP_512_3D_split: |
| 216 | + case AIE2P::VLDA_POP_544_3D_split: |
| 217 | + case AIE2P::VLDA_POP_576_3D_split: |
| 218 | + case AIE2P::VLDA_POP_640_3D_split: |
| 219 | + case AIE2P::VLDA_POP_704_3D_split: |
| 220 | + case AIE2P::VLDB_POP_512_3D_split: |
| 221 | + case AIE2P::VLDB_POP_544_3D_split: |
| 222 | + case AIE2P::VLDB_POP_576_3D_split: |
| 223 | + case AIE2P::VLDB_POP_640_3D_split: |
| 224 | + case AIE2P::VLDB_POP_704_3D_split: |
| 225 | + case AIE2P::VLD_POP_512_3D_pseudo_split: |
| 226 | + case AIE2P::VLD_POP_544_3D_pseudo_split: |
| 227 | + case AIE2P::VLD_POP_576_3D_pseudo_split: |
| 228 | + case AIE2P::VLD_POP_640_3D_pseudo_split: |
| 229 | + case AIE2P::VLD_POP_704_3D_pseudo_split: |
| 230 | + return true; |
| 231 | + default: |
| 232 | + return false; |
| 233 | + } |
| 234 | + }); |
| 235 | +} |
| 236 | + |
69 | 237 | static bool onlyAllocate3DRegisters(const TargetRegisterInfo &TRI,
|
70 | 238 | const TargetRegisterClass &RC) {
|
71 | 239 | return AIE2P::eDSRegClass.hasSubClassEq(&RC);
|
@@ -97,7 +265,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() {
|
97 | 265 | if (AllocateMRegsFirst)
|
98 | 266 | addPass(createGreedyRegisterAllocator(onlyAllocateMRegisters));
|
99 | 267 | if (EnableStagedRA) {
|
100 |
| - addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters)); |
| 268 | + addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters, |
| 269 | + onlyAllocateLIwith3DInstruction)); |
101 | 270 | addPass(createAIESuperRegRewriter());
|
102 | 271 | addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters));
|
103 | 272 | addPass(createAIESuperRegRewriter());
|
|
0 commit comments