@@ -6828,9 +6828,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6828
6828
6829
6829
// This transform can be done speculatively because it is so cheap - it
6830
6830
// results in a single rotate operation being inserted.
6831
- // FIXME: It's possible that optimizing a switch on powers of two might also
6832
- // be beneficial - flag values are often powers of two and we could use a CLZ
6833
- // as the key function.
6834
6831
6835
6832
// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6836
6833
// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
@@ -6875,6 +6872,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6875
6872
return true ;
6876
6873
}
6877
6874
6875
+ // / Tries to transform switch of powers of two to reduce switch range.
6876
+ // / For example, switch like:
6877
+ // / switch (C) { case 1: case 2: case 64: case 128: }
6878
+ // / will be transformed to:
6879
+ // / switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
6880
+ // /
6881
+ // / This transformation allows better lowering and could allow transforming into
6882
+ // / a lookup table.
6883
+ static bool simplifySwitchOfPowersOfTwo (SwitchInst *SI, IRBuilder<> &Builder,
6884
+ const DataLayout &DL,
6885
+ const TargetTransformInfo &TTI) {
6886
+ Value *Condition = SI->getCondition ();
6887
+ LLVMContext &Context = SI->getContext ();
6888
+ auto *CondTy = cast<IntegerType>(Condition->getType ());
6889
+
6890
+ if (CondTy->getIntegerBitWidth () > 64 ||
6891
+ !DL.fitsInLegalInteger (CondTy->getIntegerBitWidth ()))
6892
+ return false ;
6893
+
6894
+ const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost (
6895
+ IntrinsicCostAttributes (Intrinsic::cttz, CondTy,
6896
+ {Condition, ConstantInt::getTrue (Context)}),
6897
+ TTI::TCK_SizeAndLatency);
6898
+
6899
+ if (CttzIntrinsicCost > TTI::TCC_Basic)
6900
+ // Inserting intrinsic is too expensive.
6901
+ return false ;
6902
+
6903
+ // Only bother with this optimization if there are more than 3 switch cases.
6904
+ // SDAG will only bother creating jump tables for 4 or more cases.
6905
+ if (SI->getNumCases () < 4 )
6906
+ return false ;
6907
+
6908
+ // We perform this optimization only for switches with
6909
+ // unreachable default case.
6910
+ // This assumtion will save us from checking if `Condition` is a power of two.
6911
+ if (!isa<UnreachableInst>(SI->getDefaultDest ()->getFirstNonPHIOrDbg ()))
6912
+ return false ;
6913
+
6914
+ // Check that switch cases are powers of two.
6915
+ SmallVector<uint64_t , 4 > Values;
6916
+ for (const auto &Case : SI->cases ()) {
6917
+ uint64_t CaseValue = Case.getCaseValue ()->getValue ().getZExtValue ();
6918
+ if (llvm::has_single_bit (CaseValue))
6919
+ Values.push_back (CaseValue);
6920
+ else
6921
+ return false ;
6922
+ }
6923
+
6924
+ // isSwichDense requires case values to be sorted.
6925
+ llvm::sort (Values);
6926
+ if (!isSwitchDense (Values.size (), llvm::countr_zero (Values.back ()) -
6927
+ llvm::countr_zero (Values.front ()) + 1 ))
6928
+ // Transform is unable to generate dense switch.
6929
+ return false ;
6930
+
6931
+ Builder.SetInsertPoint (SI);
6932
+
6933
+ // Replace each case with its trailing zeros number.
6934
+ for (auto &Case : SI->cases ()) {
6935
+ auto *OrigValue = Case.getCaseValue ();
6936
+ Case.setValue (ConstantInt::get (OrigValue->getType (),
6937
+ OrigValue->getValue ().countr_zero ()));
6938
+ }
6939
+
6940
+ // Replace condition with its trailing zeros number.
6941
+ auto *ConditionTrailingZeros = Builder.CreateIntrinsic (
6942
+ Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue (Context)});
6943
+
6944
+ SI->setCondition (ConditionTrailingZeros);
6945
+
6946
+ return true ;
6947
+ }
6948
+
6878
6949
bool SimplifyCFGOpt::simplifySwitch (SwitchInst *SI, IRBuilder<> &Builder) {
6879
6950
BasicBlock *BB = SI->getParent ();
6880
6951
@@ -6922,6 +6993,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
6922
6993
SwitchToLookupTable (SI, Builder, DTU, DL, TTI))
6923
6994
return requestResimplify ();
6924
6995
6996
+ if (simplifySwitchOfPowersOfTwo (SI, Builder, DL, TTI))
6997
+ return requestResimplify ();
6998
+
6925
6999
if (ReduceSwitchRange (SI, Builder, DL, TTI))
6926
7000
return requestResimplify ();
6927
7001
0 commit comments