Skip to content

Commit c338200

Browse files
[AIEX] Enhance combiner to support post-inc-load requiring moving multiple instructions
1 parent 0626e02 commit c338200

File tree

6 files changed

+620
-283
lines changed

6 files changed

+620
-283
lines changed

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 112 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ static cl::opt<bool> EnableOffsetCombine(
4242
static cl::opt<bool> EnablePostIncCombine(
4343
"aie-postinc-combine", cl::Hidden, cl::init(true),
4444
cl::desc("Enable combines of load and stores with post increments"));
45+
static cl::opt<bool> EnablePostIncCombineMultipleInst(
46+
"aie-postinc-combine-move-multiple-instr", cl::Hidden, cl::init(true),
47+
cl::desc(
48+
"Enable combines of load and stores with post increments requiring "
49+
"multiple instructions to be moved"));
4550
static cl::opt<bool> EnableGreedyAddressCombine(
4651
"aie-greedy-address-combines", cl::Hidden, cl::init(false),
4752
cl::desc("Enable greedy combines without checking for later uses of the "
@@ -213,8 +218,11 @@ MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
213218
Register Addr = MemI.getOperand(1).getReg();
214219
MachineInstr *AddrDef = getDefIgnoringCopies(Addr, MRI);
215220
if (AddrDef->getOpcode() == TargetOpcode::G_PTR_ADD) {
216-
MatchData = {AddrDef, TII.getOffsetMemOpcode(MemI.getOpcode()), &MemI,
217-
/*ExtraInstrsToMove=*/{},
221+
MatchData = {AddrDef,
222+
TII.getOffsetMemOpcode(MemI.getOpcode()),
223+
&MemI,
224+
/*ExtraInstrsToMoveBefore=*/{},
225+
/*ExtraInstrsToMoveAfter=*/{},
218226
/*RemoveInstr=*/false};
219227
return AddrDef;
220228
}
@@ -277,6 +285,67 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
277285
return none_of(InstrRange, UnsafeToMovePast);
278286
}
279287

288+
/// Collects all dependent instructions between SrcMI and DestMI.
289+
/// This function iterates over all definitions in SrcMI and collects all
290+
/// instructions that use these definitions and are within the range from
291+
/// SrcMI to DestMI. The collected instructions are stored in the
292+
/// DependentInstrs vector. It is expected that the SrcMI & DestMI are in the BB
293+
///
294+
/// \param SrcMI The source machine instruction to start the range.
295+
/// \param DestMI The destination machine instruction to end the range.
296+
/// \param MRI The machine register info used to query register uses.
297+
/// \param DependentInstrs A set to store the collected dependent
298+
/// instructions in dominating order.
299+
void DependentInstrInRange(MachineInstr &SrcMI, MachineInstr &DestMI,
300+
MachineRegisterInfo &MRI,
301+
DependentInstrSet &DependentInstrs) {
302+
if (SrcMI.getParent() != DestMI.getParent())
303+
return;
304+
const auto SrcMIIter = SrcMI.getIterator();
305+
const auto DestMIIter = DestMI.getIterator();
306+
const auto SrcToDestDistance = std::distance(SrcMIIter, DestMIIter);
307+
for (auto &Defs : SrcMI.all_defs()) {
308+
const Register Reg = Defs.getReg();
309+
for (auto &Use : MRI.use_nodbg_instructions(Reg)) {
310+
if (Use.getParent() != DestMI.getParent())
311+
continue;
312+
auto UseIter = Use.getIterator();
313+
if (std::distance(SrcMIIter, UseIter) > SrcToDestDistance)
314+
break;
315+
DependentInstrs.insert(&Use);
316+
DependentInstrInRange(Use, DestMI, MRI, DependentInstrs);
317+
}
318+
}
319+
}
320+
321+
/// \return true and \a DependentInstrs set to move below Dest, if \a MemI
322+
/// can be moved just before \a Dest in order to allow post-increment combining
323+
bool llvm::canDelayMemOpWithExtraInstr(MachineInstr &MemI, MachineInstr &Dest,
324+
MachineRegisterInfo &MRI,
325+
DependentInstrSet &DependentInstrs) {
326+
if (MemI.getParent() != Dest.getParent())
327+
return false;
328+
DependentInstrInRange(MemI, Dest, MRI, DependentInstrs);
329+
330+
if (DependentInstrs.empty())
331+
return true;
332+
333+
// Check if we can move the dependent instructions after Dest in order to
334+
// enable post-increment combining.
335+
if (!canDelayMemOp(**DependentInstrs.begin(), Dest, MRI))
336+
return false;
337+
338+
// Check if we can move the rest of the dependent instructions after the
339+
// previous one, this inherently checks if we can move the dependent
340+
// instruction below the Dest instruction
341+
for (auto Iter = DependentInstrs.begin(), NextIter = std::next(Iter);
342+
NextIter != DependentInstrs.end(); ++Iter, ++NextIter)
343+
if (!canDelayMemOp(**NextIter, **Iter, MRI))
344+
return false;
345+
346+
return true;
347+
}
348+
280349
/// \return true if \a Dest can be moved just after \a MemI in order to allow
281350
/// combining
282351
bool llvm::canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest,
@@ -497,25 +566,43 @@ MachineInstr *findPostIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
497566
})) {
498567
continue;
499568
}
500-
MatchData = {&PtrInc, *CombinedOpcode, &MemI,
501-
/*ExtraInstrsToMove=*/{},
569+
MatchData = {&PtrInc,
570+
*CombinedOpcode,
571+
&MemI,
572+
/*dxeBefore=*/{},
573+
/*ExtraInstrsToMoveAfter=*/{},
502574
/*RemoveInstr=*/true};
503575
// The offset of the PtrInc might be defined after MemI, in this case we
504576
// want to verify if it would be possible to insert the combined
505577
// instruction at the PtrInc instead of the location of MemI. Instruction
506578
// with side effects are also blocking: Loads, stores, calls, instructions
507579
// with side effects cannot be moved.
508-
// TODO: try move other instructions that block us from combining
509580
} else if (canDelayMemOp(MemI, PtrAddInsertLoc, MRI)) {
510581
// If Definition of the offset is a G_CONSTANT we have to move that
511582
// instruction up
512583
MatchData = {
513584
&PtrInc, *CombinedOpcode, &PtrAddInsertLoc,
514-
/*ExtraInstrsToMove=*/
585+
/*ExtraInstrsToMoveBefore=*/
515586
findConstantOffsetsToMove(PtrInc, PtrAddInsertLoc, MRI, Helper),
587+
/*ExtraInstrsToMoveAfter=*/{},
516588
/*RemoveInstr=*/true};
589+
// When the offset of the PtrInc might be defined after MemI, we may want
590+
// to move some instruction below the PtrInc to allow the combine.
591+
} else if (DependentInstrSet DependentInstrToMoveBelow(Helper);
592+
EnablePostIncCombineMultipleInst &&
593+
canDelayMemOpWithExtraInstr(MemI, PtrInc, MRI,
594+
DependentInstrToMoveBelow)) {
595+
std::vector<MachineInstr *> ExtraInstrsToMoveAfter(
596+
DependentInstrToMoveBelow.begin(), DependentInstrToMoveBelow.end());
597+
MatchData = {&PtrInc,
598+
*CombinedOpcode,
599+
&PtrInc,
600+
/*ExtraInstrsToMoveBefore=*/{},
601+
/*ExtraInstrsToMoveAfter=*/ExtraInstrsToMoveAfter,
602+
/*RemoveInstr=*/true};
603+
517604
} else {
518-
LLVM_DEBUG(dbgs() << " Ignoring candidate " << PtrInc);
605+
LLVM_DEBUG(dbgs() << " Ignoring candidate for PostInc " << PtrInc);
519606
continue;
520607
}
521608
// Only combine postIncs if we know that the original pointer is not used
@@ -552,9 +639,24 @@ void llvm::applyLdStInc(MachineInstr &MI, MachineRegisterInfo &MRI,
552639
// Debug Loc: Debug Loc of LOAD STORE: MI
553640
B.setDebugLoc(MI.getDebugLoc());
554641
auto NewInstr = B.buildInstr(MatchData.CombinedInstrOpcode);
555-
for (auto *Instr : MatchData.ExtraInstrsToMove) {
556-
Instr->moveBefore(NewInstr);
557-
}
642+
643+
// Move the instructions before the NewInstr
644+
auto MoveInstrsBefore = [&NewInstr](const auto &InstrsToMoveBefore) {
645+
std::for_each(InstrsToMoveBefore.begin(), InstrsToMoveBefore.end(),
646+
[&NewInstr](auto *MI) { MI->moveBefore(NewInstr); });
647+
};
648+
MoveInstrsBefore(MatchData.ExtraInstrsToMoveBefore);
649+
650+
// Move the instructions after the NewInstr
651+
auto MoveInstrsAfter = [&NewInstr](const auto &InstrsToMoveAfter) {
652+
const auto &NewInstrIter = NewInstr.getInstr()->getIterator();
653+
std::for_each(InstrsToMoveAfter.begin(), InstrsToMoveAfter.end(),
654+
[&NewInstrIter](auto *MI) {
655+
MI->moveBefore(&*std::next(NewInstrIter));
656+
});
657+
};
658+
MoveInstrsAfter(MatchData.ExtraInstrsToMoveAfter);
659+
558660
if (MI.mayLoad())
559661
NewInstr.addDef(MI.getOperand(0).getReg() /* Loaded value */);
560662
if (MatchData.RemoveInstr)

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ struct AIELoadStoreCombineMatchData {
2828
/// before this Instruction
2929
MachineInstr *CombinedInsertPoint;
3030
/// Additional instructions to be moved just before Instr
31-
std::vector<MachineInstr *> ExtraInstrsToMove;
31+
std::vector<MachineInstr *> ExtraInstrsToMoveBefore;
32+
/// Additional instructions to be moved just after Instr
33+
std::vector<MachineInstr *> ExtraInstrsToMoveAfter;
3234
/// Should Instr (the PtrAdd) be removed after the combine was applied
3335
bool RemoveInstr;
3436
};
@@ -39,6 +41,16 @@ struct ShuffleMaskValidity {
3941
SmallVector<unsigned, 4> MaskExceptions;
4042
};
4143

44+
// Custom comparator for std::set based on dominance relation
45+
struct DominanceComparator {
46+
CombinerHelper &Helper;
47+
DominanceComparator(CombinerHelper &Helper) : Helper(Helper) {}
48+
bool operator()(MachineInstr *A, MachineInstr *B) const {
49+
return Helper.dominates(*B, *A);
50+
}
51+
};
52+
using DependentInstrSet = std::set<MachineInstr *, DominanceComparator>;
53+
4254
struct FrequentIndexResult {
4355
unsigned FrequentIdx;
4456
unsigned NonMatchingCount;
@@ -132,6 +144,12 @@ bool matchShuffleToExtractBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
132144
/// post-increment combining
133145
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
134146
MachineRegisterInfo &MRI);
147+
/// \return true if \a MemI can be moved just before \a Dest in order to allow
148+
/// post-increment combining, and return additional instuction in \a
149+
/// DependentInstrs to be moved just after \a Dest
150+
bool canDelayMemOpWithExtraInstr(MachineInstr &MemI, MachineInstr &Dest,
151+
MachineRegisterInfo &MRI,
152+
DependentInstrSet &DependentInstrs);
135153
/// \return true if \a Dest can be moved just after \a MemI in order to allow
136154
/// combining
137155
bool canAdvanceOp(MachineInstr &MemI, MachineInstr &Dest,

llvm/test/CodeGen/AIE/GlobalISel/combine-loads-stores.mir

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -212,20 +212,19 @@ body: |
212212
...
213213

214214
---
215-
name: load_not_to_postinc_cannot_move_offset
215+
name: load_to_postinc_move_inst_below_PTR_ADD
216216
body: |
217217
bb.0:
218218
liveins: $p0
219-
; CHECK-LABEL: name: load_not_to_postinc_cannot_move_offset
219+
; CHECK-LABEL: name: load_to_postinc_move_inst_below_PTR_ADD
220220
; CHECK: liveins: $p0
221221
; CHECK-NEXT: {{ $}}
222222
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
223-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32))
224-
; CHECK-NEXT: $r0 = COPY [[LOAD]](s32)
225223
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
226224
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32)
227-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20)
228-
; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0)
225+
; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC]](s20) :: (load (s32))
226+
; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32)
227+
; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0)
229228
%0:_(p0) = COPY $p0
230229
%3:_(s32) = G_LOAD %0 :: (load (s32))
231230
$r0 = COPY %3
@@ -421,22 +420,21 @@ body: |
421420
...
422421

423422
---
424-
name: store_not_to_postinc_cannot_move_offset
423+
name: store_to_postinc_move_store_near_to_ptr_add
425424
body: |
426425
bb.0:
427426
liveins: $p0, $r0
428-
; CHECK-LABEL: name: store_not_to_postinc_cannot_move_offset
427+
; CHECK-LABEL: name: store_to_postinc_move_store_near_to_ptr_add
429428
; CHECK: liveins: $p0, $r0
430429
; CHECK-NEXT: {{ $}}
431430
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
432431
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0
433-
; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32))
434432
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $p1
435433
; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY2]](p0) :: (store (s32))
436434
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
437435
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32)
438-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20)
439-
; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0)
436+
; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[COPY1]](s32), [[COPY]], [[TRUNC]](s20) :: (store (s32))
437+
; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_STORE]](p0)
440438
%0:_(p0) = COPY $p0
441439
%1:_(s32) = COPY $r0
442440
G_STORE %1, %0 :: (store (s32))

0 commit comments

Comments
 (0)