-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SimplifyIndVar] Push more users to worklist for simplifyUsers #93598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d1a22f3
e0d9d00
3511e43
aeac9e8
06970c7
522ba6e
db8fb22
8a92def
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,13 +62,18 @@ namespace { | |
bool Changed = false; | ||
bool RunUnswitching = false; | ||
|
||
// When following the def-use chains, it can go outside the loop. | ||
// Strict upper bound on number of traversed out-of-loop blocks. | ||
unsigned MaxDepthOutOfLoop; | ||
|
||
public: | ||
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, | ||
LoopInfo *LI, const TargetTransformInfo *TTI, | ||
SCEVExpander &Rewriter, | ||
SmallVectorImpl<WeakTrackingVH> &Dead) | ||
SmallVectorImpl<WeakTrackingVH> &Dead, | ||
unsigned MaxDepthOutOfLoop = 1) | ||
: L(Loop), LI(LI), SE(SE), DT(DT), TTI(TTI), Rewriter(Rewriter), | ||
DeadInsts(Dead) { | ||
DeadInsts(Dead), MaxDepthOutOfLoop(MaxDepthOutOfLoop) { | ||
assert(LI && "IV simplification requires LoopInfo"); | ||
} | ||
|
||
|
@@ -80,10 +85,11 @@ namespace { | |
/// all simplifications to users of an IV. | ||
void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); | ||
|
||
void pushIVUsers(Instruction *Def, | ||
SmallPtrSet<Instruction *, 16> &Simplified, | ||
SmallVectorImpl<std::pair<Instruction *, Instruction *>> | ||
&SimpleIVUsers); | ||
void pushIVUsers( | ||
Instruction *Def, SmallPtrSet<Instruction *, 16> &Simplified, | ||
SmallVectorImpl<std::tuple<Instruction *, Instruction *, unsigned>> | ||
&SimpleIVUsers, | ||
unsigned OutOfLoopChainCounter); | ||
|
||
Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); | ||
|
||
|
@@ -514,8 +520,8 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { | |
!DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) | ||
continue; | ||
ICmpInst *ICI = dyn_cast<ICmpInst>(U); | ||
if (!ICI) return false; | ||
assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); | ||
if (!ICI) | ||
return false; | ||
if (!(ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) && | ||
!(ICI->getOperand(1) == TI && L->isLoopInvariant(ICI->getOperand(0)))) | ||
return false; | ||
|
@@ -548,7 +554,7 @@ bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { | |
}; | ||
// Replace all comparisons against trunc with comparisons against IV. | ||
for (auto *ICI : ICmpUsers) { | ||
bool IsSwapped = L->isLoopInvariant(ICI->getOperand(0)); | ||
bool IsSwapped = ICI->getOperand(0) != TI; | ||
auto *Op1 = IsSwapped ? ICI->getOperand(0) : ICI->getOperand(1); | ||
IRBuilder<> Builder(ICI); | ||
Value *Ext = nullptr; | ||
|
@@ -846,7 +852,9 @@ bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, | |
/// Add all uses of Def to the current IV's worklist. | ||
void SimplifyIndvar::pushIVUsers( | ||
Instruction *Def, SmallPtrSet<Instruction *, 16> &Simplified, | ||
SmallVectorImpl<std::pair<Instruction *, Instruction *>> &SimpleIVUsers) { | ||
SmallVectorImpl<std::tuple<Instruction *, Instruction *, unsigned>> | ||
&SimpleIVUsers, | ||
unsigned OutOfLoopChainCounter) { | ||
for (User *U : Def->users()) { | ||
Instruction *UI = cast<Instruction>(U); | ||
|
||
|
@@ -857,16 +865,22 @@ void SimplifyIndvar::pushIVUsers( | |
if (UI == Def) | ||
continue; | ||
|
||
// Only change the current Loop, do not change the other parts (e.g. other | ||
// Loops). | ||
if (!L->contains(UI)) | ||
// Avoid adding Defs that SCEV expand to themselves, e.g. the LoopPhis | ||
// of the outer loops. | ||
if (!DT->dominates(L->getHeader(), UI->getParent())) | ||
continue; | ||
|
||
// Do not push the same instruction more than once. | ||
if (!Simplified.insert(UI).second) | ||
continue; | ||
|
||
SimpleIVUsers.push_back(std::make_pair(UI, Def)); | ||
unsigned Counter = | ||
L->contains(UI) | ||
? 0 // reset depth if we go back inside the loop. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to do that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. If we want to compute a overapproximation of the distance in the CFG |
||
: OutOfLoopChainCounter + (UI->getParent() != Def->getParent()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this a limit on the depth of the blocks rather than instructions? You can have a very long chain of instructions within a single block, or a very short one across blocks. It seems like that would be the more relevant quantity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At first, I implemented a instruction-wise depth but I changed that as I visualised the algorithm at the CFG level, as accepting optimising basic blocks that are at given distance of the loop Also, I didn't like the idea to have partially optimised basic-blocks. |
||
|
||
if (!MaxDepthOutOfLoop || Counter < MaxDepthOutOfLoop) | ||
SimpleIVUsers.push_back(std::make_tuple(UI, Def, Counter)); | ||
} | ||
} | ||
|
||
|
@@ -911,17 +925,17 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { | |
SmallPtrSet<Instruction*,16> Simplified; | ||
|
||
// Use-def pairs if IV users waiting to be processed for CurrIV. | ||
SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; | ||
SmallVector<std::tuple<Instruction *, Instruction *, unsigned>, 8> | ||
SimpleIVUsers; | ||
|
||
// Push users of the current LoopPhi. In rare cases, pushIVUsers may be | ||
// called multiple times for the same LoopPhi. This is the proper thing to | ||
// do for loop header phis that use each other. | ||
pushIVUsers(CurrIV, Simplified, SimpleIVUsers); | ||
pushIVUsers(CurrIV, Simplified, SimpleIVUsers, 0); | ||
|
||
while (!SimpleIVUsers.empty()) { | ||
std::pair<Instruction*, Instruction*> UseOper = | ||
SimpleIVUsers.pop_back_val(); | ||
Instruction *UseInst = UseOper.first; | ||
auto [UseInst, IVOperand, OutOfLoopChainCounter] = | ||
SimpleIVUsers.pop_back_val(); | ||
|
||
// If a user of the IndVar is trivially dead, we prefer just to mark it dead | ||
// rather than try to do some complex analysis or transformation (such as | ||
|
@@ -945,11 +959,11 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { | |
if ((isa<PtrToIntInst>(UseInst)) || (isa<TruncInst>(UseInst))) | ||
for (Use &U : UseInst->uses()) { | ||
Instruction *User = cast<Instruction>(U.getUser()); | ||
if (replaceIVUserWithLoopInvariant(User)) | ||
if (DT->dominates(L->getHeader(), User->getParent()) && | ||
nikic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
replaceIVUserWithLoopInvariant(User)) | ||
break; // done replacing | ||
} | ||
|
||
Instruction *IVOperand = UseOper.second; | ||
for (unsigned N = 0; IVOperand; ++N) { | ||
assert(N <= Simplified.size() && "runaway iteration"); | ||
(void) N; | ||
|
@@ -963,22 +977,23 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { | |
continue; | ||
|
||
if (eliminateIVUser(UseInst, IVOperand)) { | ||
pushIVUsers(IVOperand, Simplified, SimpleIVUsers); | ||
pushIVUsers(IVOperand, Simplified, SimpleIVUsers, OutOfLoopChainCounter); | ||
continue; | ||
} | ||
|
||
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) { | ||
if (strengthenBinaryOp(BO, IVOperand)) { | ||
// re-queue uses of the now modified binary operator and fall | ||
// through to the checks that remain. | ||
pushIVUsers(IVOperand, Simplified, SimpleIVUsers); | ||
pushIVUsers(IVOperand, Simplified, SimpleIVUsers, | ||
OutOfLoopChainCounter); | ||
} | ||
} | ||
|
||
// Try to use integer induction for FPToSI of float induction directly. | ||
if (replaceFloatIVWithIntegerIV(UseInst)) { | ||
// Re-queue the potentially new direct uses of IVOperand. | ||
pushIVUsers(IVOperand, Simplified, SimpleIVUsers); | ||
pushIVUsers(IVOperand, Simplified, SimpleIVUsers, OutOfLoopChainCounter); | ||
continue; | ||
} | ||
|
||
|
@@ -988,7 +1003,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { | |
continue; | ||
} | ||
if (isSimpleIVUser(UseInst, L, SE)) { | ||
pushIVUsers(UseInst, Simplified, SimpleIVUsers); | ||
pushIVUsers(UseInst, Simplified, SimpleIVUsers, OutOfLoopChainCounter); | ||
} | ||
} | ||
} | ||
|
@@ -1002,13 +1017,13 @@ void IVVisitor::anchor() { } | |
/// Returns a pair where the first entry indicates that the function makes | ||
/// changes and the second entry indicates that it introduced new opportunities | ||
/// for loop unswitching. | ||
std::pair<bool, bool> simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, | ||
DominatorTree *DT, LoopInfo *LI, | ||
const TargetTransformInfo *TTI, | ||
SmallVectorImpl<WeakTrackingVH> &Dead, | ||
SCEVExpander &Rewriter, IVVisitor *V) { | ||
std::pair<bool, bool> | ||
simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, | ||
LoopInfo *LI, const TargetTransformInfo *TTI, | ||
SmallVectorImpl<WeakTrackingVH> &Dead, SCEVExpander &Rewriter, | ||
unsigned MaxDepthOutOfLoop, IVVisitor *V) { | ||
SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, TTI, | ||
Rewriter, Dead); | ||
Rewriter, Dead, MaxDepthOutOfLoop); | ||
SIV.simplifyUsers(CurrIV, V); | ||
return {SIV.hasChanged(), SIV.runUnswitching()}; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need the MaxDepthOutOfLoop parameter? Can you move the cl::opt into SimplifyIndVar and directly use it instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Transforms/Utils/LoopUnroll.cpp
uses it viasimplifyLoopIVs
. I didn't want to add it here as it could create regressions in passes I'm not interested in.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, that makes sense. But as implemented, aren't you still calling this with the default of 1 from LoopUnroll? Shouldn't the default be 0 instead?
Could you please rebase over 6e3725d so we at least don't have to pass through this parameter everywhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The question you raise is actually quite important as this code is disabled with this current default config. I disabled it as I wanted to make it optimisation level dependent but I needed some input to calibrate it according to the level.
It is set to
1
which means the set of BBs that will be affected aredist(Loop, BB) < 1 => BB in Loop
(the strict bound is on purpose). The0
value is a special value for an+infinity
bound.I don't know if this encoding is satisfying. Another one is to use
-1
but I don't know ifcl::opt<unsigned>
supports it.+1 for making
pushIVUsers
a method instead of a function.I think I'll put it to
0
now as it will show performance regressions and allow discussing about that. I'll add some tests before though.