@@ -42,6 +42,11 @@ static cl::opt<bool> EnableOffsetCombine(
42
42
static cl::opt<bool > EnablePostIncCombine (
43
43
" aie-postinc-combine" , cl::Hidden, cl::init(true ),
44
44
cl::desc(" Enable combines of load and stores with post increments" ));
45
+ static cl::opt<bool > EnablePostIncCombineMultipleInst (
46
+ " aie-postinc-combine-move-multiple-instr" , cl::Hidden, cl::init(true ),
47
+ cl::desc(
48
+ " Enable combines of load and stores with post increments requiring "
49
+ " multiple instructions to be moved" ));
45
50
static cl::opt<bool > EnableGreedyAddressCombine (
46
51
" aie-greedy-address-combines" , cl::Hidden, cl::init(false ),
47
52
cl::desc(" Enable greedy combines without checking for later uses of the "
@@ -213,8 +218,11 @@ MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
213
218
Register Addr = MemI.getOperand (1 ).getReg ();
214
219
MachineInstr *AddrDef = getDefIgnoringCopies (Addr, MRI);
215
220
if (AddrDef->getOpcode () == TargetOpcode::G_PTR_ADD) {
216
- MatchData = {AddrDef, TII.getOffsetMemOpcode (MemI.getOpcode ()), &MemI,
217
- /* ExtraInstrsToMove=*/ {},
221
+ MatchData = {AddrDef,
222
+ TII.getOffsetMemOpcode (MemI.getOpcode ()),
223
+ &MemI,
224
+ /* ExtraInstrsToMoveBefore=*/ {},
225
+ /* ExtraInstrsToMoveAfter=*/ {},
218
226
/* RemoveInstr=*/ false };
219
227
return AddrDef;
220
228
}
@@ -277,6 +285,67 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
277
285
return none_of (InstrRange, UnsafeToMovePast);
278
286
}
279
287
288
+ // / Collects all dependent instructions between SrcMI and DestMI.
289
+ // / This function iterates over all definitions in SrcMI and collects all
290
+ // / instructions that use these definitions and are within the range from
291
+ // / SrcMI to DestMI. The collected instructions are stored in the
292
+ // / DependentInstrs vector. It is expected that the SrcMI & DestMI are in the BB
293
+ // /
294
+ // / \param SrcMI The source machine instruction to start the range.
295
+ // / \param DestMI The destination machine instruction to end the range.
296
+ // / \param MRI The machine register info used to query register uses.
297
+ // / \param DependentInstrs A set to store the collected dependent
298
+ // / instructions in dominating order.
299
+ void DependentInstrInRange (MachineInstr &SrcMI, MachineInstr &DestMI,
300
+ MachineRegisterInfo &MRI,
301
+ DependentInstrSet &DependentInstrs) {
302
+ if (SrcMI.getParent () != DestMI.getParent ())
303
+ return ;
304
+ const auto SrcMIIter = SrcMI.getIterator ();
305
+ const auto DestMIIter = DestMI.getIterator ();
306
+ const auto SrcToDestDistance = std::distance (SrcMIIter, DestMIIter);
307
+ for (auto &Defs : SrcMI.all_defs ()) {
308
+ const Register Reg = Defs.getReg ();
309
+ for (auto &Use : MRI.use_nodbg_instructions (Reg)) {
310
+ if (Use.getParent () != DestMI.getParent ())
311
+ continue ;
312
+ auto UseIter = Use.getIterator ();
313
+ if (std::distance (SrcMIIter, UseIter) > SrcToDestDistance)
314
+ break ;
315
+ DependentInstrs.insert (&Use);
316
+ DependentInstrInRange (Use, DestMI, MRI, DependentInstrs);
317
+ }
318
+ }
319
+ }
320
+
321
+ // / \return true and \a DependentInstrs set to move below Dest, if \a MemI
322
+ // / can be moved just before \a Dest in order to allow post-increment combining
323
+ bool llvm::canDelayMemOpWithExtraInstr (MachineInstr &MemI, MachineInstr &Dest,
324
+ MachineRegisterInfo &MRI,
325
+ DependentInstrSet &DependentInstrs) {
326
+ if (MemI.getParent () != Dest.getParent ())
327
+ return false ;
328
+ DependentInstrInRange (MemI, Dest, MRI, DependentInstrs);
329
+
330
+ if (DependentInstrs.empty ())
331
+ return true ;
332
+
333
+ // Check if we can move the dependent instructions after Dest in order to
334
+ // enable post-increment combining.
335
+ if (!canDelayMemOp (**DependentInstrs.begin (), Dest, MRI))
336
+ return false ;
337
+
338
+ // Check if we can move the rest of the dependent instructions after the
339
+ // previous one, this inherently checks if we can move the dependent
340
+ // instruction below the Dest instruction
341
+ for (auto Iter = DependentInstrs.begin (), NextIter = std::next (Iter);
342
+ NextIter != DependentInstrs.end (); ++Iter, ++NextIter)
343
+ if (!canDelayMemOp (**NextIter, **Iter, MRI))
344
+ return false ;
345
+
346
+ return true ;
347
+ }
348
+
280
349
// / \return true if \a Dest can be moved just after \a MemI in order to allow
281
350
// / combining
282
351
bool llvm::canAdvanceOp (MachineInstr &MemI, MachineInstr &Dest,
@@ -497,25 +566,43 @@ MachineInstr *findPostIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI,
497
566
})) {
498
567
continue ;
499
568
}
500
- MatchData = {&PtrInc, *CombinedOpcode, &MemI,
501
- /* ExtraInstrsToMove=*/ {},
569
+ MatchData = {&PtrInc,
570
+ *CombinedOpcode,
571
+ &MemI,
572
+ /* dxeBefore=*/ {},
573
+ /* ExtraInstrsToMoveAfter=*/ {},
502
574
/* RemoveInstr=*/ true };
503
575
// The offset of the PtrInc might be defined after MemI, in this case we
504
576
// want to verify if it would be possible to insert the combined
505
577
// instruction at the PtrInc instead of the location of MemI. Instruction
506
578
// with side effects are also blocking: Loads, stores, calls, instructions
507
579
// with side effects cannot be moved.
508
- // TODO: try move other instructions that block us from combining
509
580
} else if (canDelayMemOp (MemI, PtrAddInsertLoc, MRI)) {
510
581
// If Definition of the offset is a G_CONSTANT we have to move that
511
582
// instruction up
512
583
MatchData = {
513
584
&PtrInc, *CombinedOpcode, &PtrAddInsertLoc,
514
- /* ExtraInstrsToMove =*/
585
+ /* ExtraInstrsToMoveBefore =*/
515
586
findConstantOffsetsToMove (PtrInc, PtrAddInsertLoc, MRI, Helper),
587
+ /* ExtraInstrsToMoveAfter=*/ {},
516
588
/* RemoveInstr=*/ true };
589
+ // When the offset of the PtrInc might be defined after MemI, we may want
590
+ // to move some instruction below the PtrInc to allow the combine.
591
+ } else if (DependentInstrSet DependentInstrToMoveBelow (Helper);
592
+ EnablePostIncCombineMultipleInst &&
593
+ canDelayMemOpWithExtraInstr (MemI, PtrInc, MRI,
594
+ DependentInstrToMoveBelow)) {
595
+ std::vector<MachineInstr *> ExtraInstrsToMoveAfter (
596
+ DependentInstrToMoveBelow.begin (), DependentInstrToMoveBelow.end ());
597
+ MatchData = {&PtrInc,
598
+ *CombinedOpcode,
599
+ &PtrInc,
600
+ /* ExtraInstrsToMoveBefore=*/ {},
601
+ /* ExtraInstrsToMoveAfter=*/ ExtraInstrsToMoveAfter,
602
+ /* RemoveInstr=*/ true };
603
+
517
604
} else {
518
- LLVM_DEBUG (dbgs () << " Ignoring candidate " << PtrInc);
605
+ LLVM_DEBUG (dbgs () << " Ignoring candidate for PostInc " << PtrInc);
519
606
continue ;
520
607
}
521
608
// Only combine postIncs if we know that the original pointer is not used
@@ -552,9 +639,24 @@ void llvm::applyLdStInc(MachineInstr &MI, MachineRegisterInfo &MRI,
552
639
// Debug Loc: Debug Loc of LOAD STORE: MI
553
640
B.setDebugLoc (MI.getDebugLoc ());
554
641
auto NewInstr = B.buildInstr (MatchData.CombinedInstrOpcode );
555
- for (auto *Instr : MatchData.ExtraInstrsToMove ) {
556
- Instr->moveBefore (NewInstr);
557
- }
642
+
643
+ // Move the instructions before the NewInstr
644
+ auto MoveInstrsBefore = [&NewInstr](const auto &InstrsToMoveBefore) {
645
+ std::for_each (InstrsToMoveBefore.begin (), InstrsToMoveBefore.end (),
646
+ [&NewInstr](auto *MI) { MI->moveBefore (NewInstr); });
647
+ };
648
+ MoveInstrsBefore (MatchData.ExtraInstrsToMoveBefore );
649
+
650
+ // Move the instructions after the NewInstr
651
+ auto MoveInstrsAfter = [&NewInstr](const auto &InstrsToMoveAfter) {
652
+ const auto &NewInstrIter = NewInstr.getInstr ()->getIterator ();
653
+ std::for_each (InstrsToMoveAfter.begin (), InstrsToMoveAfter.end (),
654
+ [&NewInstrIter](auto *MI) {
655
+ MI->moveBefore (&*std::next (NewInstrIter));
656
+ });
657
+ };
658
+ MoveInstrsAfter (MatchData.ExtraInstrsToMoveAfter );
659
+
558
660
if (MI.mayLoad ())
559
661
NewInstr.addDef (MI.getOperand (0 ).getReg () /* Loaded value */ );
560
662
if (MatchData.RemoveInstr )
0 commit comments