Skip to content

Commit e360a16

Browse files
authored
[GlobalOpt] Cache whether CC is changeable (#71381)
The hasAddressTaken() call in hasOnlyColdCalls() has quadratic complexity if there are many cold calls to a function: We're going to visit each call of the function, and then for each of them iterate all the users of the function. We've recently encountered a case where GlobalOpt spends more than an hour in these hasAddressTaken() checks when full LTO is used. Avoid this by moving the hasAddressTaken() check into hasChangeableCC() and caching its result, so it is only computed once per function.
1 parent ea9d44f commit e360a16

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+22-8
Original file line numberDiff line numberDiff line change
@@ -1680,13 +1680,16 @@ static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
16801680
/// idea here is that we don't want to mess with the convention if the user
16811681
/// explicitly requested something with performance implications like coldcc,
16821682
/// GHC, or anyregcc.
1683-
static bool hasChangeableCC(Function *F) {
1683+
static bool hasChangeableCCImpl(Function *F) {
16841684
CallingConv::ID CC = F->getCallingConv();
16851685

16861686
// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
16871687
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
16881688
return false;
16891689

1690+
if (F->isVarArg())
1691+
return false;
1692+
16901693
// FIXME: Change CC for the whole chain of musttail calls when possible.
16911694
//
16921695
// Can't change CC of the function that either has musttail calls, or is a
@@ -1706,7 +1709,16 @@ static bool hasChangeableCC(Function *F) {
17061709
if (BB.getTerminatingMustTailCall())
17071710
return false;
17081711

1709-
return true;
1712+
return !F->hasAddressTaken();
1713+
}
1714+
1715+
using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>;
1716+
static bool hasChangeableCC(Function *F,
1717+
ChangeableCCCacheTy &ChangeableCCCache) {
1718+
auto Res = ChangeableCCCache.try_emplace(F, false);
1719+
if (Res.second)
1720+
Res.first->second = hasChangeableCCImpl(F);
1721+
return Res.first->second;
17101722
}
17111723

17121724
/// Return true if the block containing the call site has a BlockFrequency of
@@ -1760,7 +1772,8 @@ static void changeCallSitesToColdCC(Function *F) {
17601772
// coldcc calling convention.
17611773
static bool
17621774
hasOnlyColdCalls(Function &F,
1763-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
1775+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1776+
ChangeableCCCacheTy &ChangeableCCCache) {
17641777
for (BasicBlock &BB : F) {
17651778
for (Instruction &I : BB) {
17661779
if (CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -1779,8 +1792,7 @@ hasOnlyColdCalls(Function &F,
17791792
if (!CalledFn->hasLocalLinkage())
17801793
return false;
17811794
// Check if it's valid to use coldcc calling convention.
1782-
if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
1783-
CalledFn->hasAddressTaken())
1795+
if (!hasChangeableCC(CalledFn, ChangeableCCCache))
17841796
return false;
17851797
BlockFrequencyInfo &CallerBFI = GetBFI(F);
17861798
if (!isColdCallSite(*CI, CallerBFI))
@@ -1905,9 +1917,10 @@ OptimizeFunctions(Module &M,
19051917

19061918
bool Changed = false;
19071919

1920+
ChangeableCCCacheTy ChangeableCCCache;
19081921
std::vector<Function *> AllCallsCold;
19091922
for (Function &F : llvm::make_early_inc_range(M))
1910-
if (hasOnlyColdCalls(F, GetBFI))
1923+
if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache))
19111924
AllCallsCold.push_back(&F);
19121925

19131926
// Optimize functions.
@@ -1969,7 +1982,7 @@ OptimizeFunctions(Module &M,
19691982
continue;
19701983
}
19711984

1972-
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
1985+
if (hasChangeableCC(&F, ChangeableCCCache)) {
19731986
NumInternalFunc++;
19741987
TargetTransformInfo &TTI = GetTTI(F);
19751988
// Change the calling convention to coldcc if either stress testing is
@@ -1979,14 +1992,15 @@ OptimizeFunctions(Module &M,
19791992
if (EnableColdCCStressTest ||
19801993
(TTI.useColdCCForColdCall(F) &&
19811994
isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
1995+
ChangeableCCCache.erase(&F);
19821996
F.setCallingConv(CallingConv::Cold);
19831997
changeCallSitesToColdCC(&F);
19841998
Changed = true;
19851999
NumColdCC++;
19862000
}
19872001
}
19882002

1989-
if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
2003+
if (hasChangeableCC(&F, ChangeableCCCache)) {
19902004
// If this function has a calling convention worth changing, is not a
19912005
// varargs function, and is only called directly, promote it to use the
19922006
// Fast calling convention.

0 commit comments

Comments
 (0)