@@ -112,7 +112,7 @@ bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
112
112
Cost InstCostVisitor::estimateBasicBlocks (
113
113
SmallVectorImpl<BasicBlock *> &WorkList) {
114
114
Cost CodeSize = 0 ;
115
- // Accumulate the instruction cost of each basic block weighted by frequency .
115
+ // Accumulate the codesize savings of each basic block.
116
116
while (!WorkList.empty ()) {
117
117
BasicBlock *BB = WorkList.pop_back_val ();
118
118
@@ -154,37 +154,73 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
154
154
return KnownConstants.lookup (V);
155
155
}
156
156
157
- Bonus InstCostVisitor::getBonusFromPendingPHIs () {
158
- Bonus B ;
157
+ Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs () {
158
+ Cost CodeSize ;
159
159
while (!PendingPHIs.empty ()) {
160
160
Instruction *Phi = PendingPHIs.pop_back_val ();
161
161
// The pending PHIs could have been proven dead by now.
162
162
if (isBlockExecutable (Phi->getParent ()))
163
- B += getUserBonus (Phi);
163
+ CodeSize += getCodeSizeSavingsForUser (Phi);
164
164
}
165
- return B ;
165
+ return CodeSize ;
166
166
}
167
167
168
- // / Compute a bonus for replacing argument \p A with constant \p C.
169
- Bonus InstCostVisitor::getSpecializationBonus (Argument *A, Constant *C) {
168
+ // / Compute the codesize savings for replacing argument \p A with constant \p C.
169
+ Cost InstCostVisitor::getCodeSizeSavingsForArg (Argument *A, Constant *C) {
170
170
LLVM_DEBUG (dbgs () << " FnSpecialization: Analysing bonus for constant: "
171
171
<< C->getNameOrAsOperand () << " \n " );
172
- Bonus B ;
172
+ Cost CodeSize ;
173
173
for (auto *U : A->users ())
174
174
if (auto *UI = dyn_cast<Instruction>(U))
175
175
if (isBlockExecutable (UI->getParent ()))
176
- B += getUserBonus (UI, A, C);
176
+ CodeSize += getCodeSizeSavingsForUser (UI, A, C);
177
177
178
178
LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated bonus {CodeSize = "
179
- << B.CodeSize << " , Latency = " << B.Latency
180
- << " } for argument " << *A << " \n " );
181
- return B;
179
+ << CodeSize << " } for argument " << *A << " \n " );
180
+ return CodeSize;
182
181
}
183
182
184
- Bonus InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
183
+ // / Compute the latency savings from replacing all arguments with constants for
184
+ // / a specialization candidate. As this function computes the latency savings
185
+ // / for all Instructions in KnownConstants at once, it should be called only
186
+ // / after every instruction has been visited, i.e. after:
187
+ // /
188
+ // / * getCodeSizeSavingsForArg has been run for every constant argument of a
189
+ // / specialization candidate
190
+ // /
191
+ // / * getCodeSizeSavingsFromPendingPHIs has been run
192
+ // /
193
+ // / to ensure that the latency savings are calculated for all Instructions we
194
+ // / have visited and found to be constant.
195
+ Cost InstCostVisitor::getLatencySavingsForKnownConstants () {
196
+ auto &BFI = GetBFI (*F);
197
+ Cost TotalLatency = 0 ;
198
+
199
+ for (auto Pair : KnownConstants) {
200
+ Instruction *I = dyn_cast<Instruction>(Pair.first );
201
+ if (!I)
202
+ continue ;
203
+
204
+ uint64_t Weight = BFI.getBlockFreq (I->getParent ()).getFrequency () /
205
+ BFI.getEntryFreq ().getFrequency ();
206
+
207
+ Cost Latency =
208
+ Weight * TTI.getInstructionCost (I, TargetTransformInfo::TCK_Latency);
209
+
210
+ LLVM_DEBUG (dbgs () << " FnSpecialization: {Latency = " << Latency
211
+ << " } for instruction " << *I << " \n " );
212
+
213
+ TotalLatency += Latency;
214
+ }
215
+
216
+ return TotalLatency;
217
+ }
218
+
219
+ Cost InstCostVisitor::getCodeSizeSavingsForUser (Instruction *User, Value *Use,
220
+ Constant *C) {
185
221
// We have already propagated a constant for this user.
186
222
if (KnownConstants.contains (User))
187
- return { 0 , 0 } ;
223
+ return 0 ;
188
224
189
225
// Cache the iterator before visiting.
190
226
LastVisited = Use ? KnownConstants.insert ({Use, C}).first
@@ -198,7 +234,7 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
198
234
} else {
199
235
C = visit (*User);
200
236
if (!C)
201
- return { 0 , 0 } ;
237
+ return 0 ;
202
238
}
203
239
204
240
// Even though it doesn't make sense to bind switch and branch instructions
@@ -208,23 +244,15 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
208
244
209
245
CodeSize += TTI.getInstructionCost (User, TargetTransformInfo::TCK_CodeSize);
210
246
211
- uint64_t Weight = BFI.getBlockFreq (User->getParent ()).getFrequency () /
212
- BFI.getEntryFreq ().getFrequency ();
213
-
214
- Cost Latency = Weight *
215
- TTI.getInstructionCost (User, TargetTransformInfo::TCK_Latency);
216
-
217
247
LLVM_DEBUG (dbgs () << " FnSpecialization: {CodeSize = " << CodeSize
218
- << " , Latency = " << Latency << " } for user "
219
- << *User << " \n " );
248
+ << " } for user " << *User << " \n " );
220
249
221
- Bonus B (CodeSize, Latency);
222
250
for (auto *U : User->users ())
223
251
if (auto *UI = dyn_cast<Instruction>(U))
224
252
if (UI != User && isBlockExecutable (UI->getParent ()))
225
- B += getUserBonus (UI, User, C);
253
+ CodeSize += getCodeSizeSavingsForUser (UI, User, C);
226
254
227
- return B ;
255
+ return CodeSize ;
228
256
}
229
257
230
258
Cost InstCostVisitor::estimateSwitchInst (SwitchInst &I) {
@@ -809,6 +837,18 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
809
837
return Clone;
810
838
}
811
839
840
+ // / Get the unsigned Value of given Cost object. Assumes the Cost is always
841
+ // / non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
842
+ // / always Valid.
843
+ static unsigned getCostValue (const Cost &C) {
844
+ int64_t Value = *C.getValue ();
845
+
846
+ assert (Value >= 0 && " CodeSize and Latency cannot be negative" );
847
+ // It is safe to down cast since we know the arguments cannot be negative and
848
+ // Cost is of type int64_t.
849
+ return static_cast <unsigned >(Value);
850
+ }
851
+
812
852
bool FunctionSpecializer::findSpecializations (Function *F, unsigned FuncSize,
813
853
SmallVectorImpl<Spec> &AllSpecs,
814
854
SpecMap &SM) {
@@ -875,48 +915,67 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
875
915
AllSpecs[Index].CallSites .push_back (&CS);
876
916
} else {
877
917
// Calculate the specialisation gain.
878
- Bonus B ;
918
+ Cost CodeSize ;
879
919
unsigned Score = 0 ;
880
920
InstCostVisitor Visitor = getInstCostVisitorFor (F);
881
921
for (ArgInfo &A : S.Args ) {
882
- B += Visitor.getSpecializationBonus (A.Formal , A.Actual );
922
+ CodeSize += Visitor.getCodeSizeSavingsForArg (A.Formal , A.Actual );
883
923
Score += getInliningBonus (A.Formal , A.Actual );
884
924
}
885
- B += Visitor.getBonusFromPendingPHIs ();
925
+ CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs ();
886
926
887
-
888
- LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization bonus {CodeSize = "
889
- << B.CodeSize << " , Latency = " << B.Latency
890
- << " , Inlining = " << Score << " }\n " );
891
-
892
- FunctionGrowth[F] += FuncSize - B.CodeSize ;
893
-
894
- auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
895
- unsigned FuncGrowth) -> bool {
927
+ auto IsProfitable = [&]() -> bool {
896
928
// No check required.
897
929
if (ForceSpecialization)
898
930
return true ;
931
+
932
+ unsigned CodeSizeSavings = getCostValue (CodeSize);
933
+ // TODO: We should only accumulate codesize increase of specializations
934
+ // that are actually created.
935
+ FunctionGrowth[F] += FuncSize - CodeSizeSavings;
936
+
937
+ LLVM_DEBUG (
938
+ dbgs () << " FnSpecialization: Specialization bonus {Inlining = "
939
+ << Score << " (" << (Score * 100 / FuncSize) << " %)}\n " );
940
+
899
941
// Minimum inlining bonus.
900
942
if (Score > MinInliningBonus * FuncSize / 100 )
901
943
return true ;
944
+
945
+ LLVM_DEBUG (
946
+ dbgs () << " FnSpecialization: Specialization bonus {CodeSize = "
947
+ << CodeSizeSavings << " ("
948
+ << (CodeSizeSavings * 100 / FuncSize) << " %)}\n " );
949
+
902
950
// Minimum codesize savings.
903
- if (B. CodeSize < MinCodeSizeSavings * FuncSize / 100 )
951
+ if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100 )
904
952
return false ;
953
+
954
+ // Lazily compute the Latency, to avoid unnecessarily computing BFI.
955
+ unsigned LatencySavings =
956
+ getCostValue (Visitor.getLatencySavingsForKnownConstants ());
957
+
958
+ LLVM_DEBUG (
959
+ dbgs () << " FnSpecialization: Specialization bonus {Latency = "
960
+ << LatencySavings << " ("
961
+ << (LatencySavings * 100 / FuncSize) << " %)}\n " );
962
+
905
963
// Minimum latency savings.
906
- if (B. Latency < MinLatencySavings * FuncSize / 100 )
964
+ if (LatencySavings < MinLatencySavings * FuncSize / 100 )
907
965
return false ;
908
966
// Maximum codesize growth.
909
- if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
967
+ if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
910
968
return false ;
969
+
970
+ Score += std::max (CodeSizeSavings, LatencySavings);
911
971
return true ;
912
972
};
913
973
914
974
// Discard unprofitable specialisations.
915
- if (!IsProfitable (B, Score, FuncSize, FunctionGrowth[F] ))
975
+ if (!IsProfitable ())
916
976
continue ;
917
977
918
978
// Create a new specialisation entry.
919
- Score += std::max (B.CodeSize , B.Latency );
920
979
auto &Spec = AllSpecs.emplace_back (F, S, Score);
921
980
if (CS.getFunction () != F)
922
981
Spec.CallSites .push_back (&CS);
0 commit comments