Skip to content

Commit 424c424

Browse files
authored
[SimplifyCFG] Add optimization for switches of powers of two (llvm#70977)
Optimization reduces the range for switches whose cases are positive powers of two by replacing each case with count_trailing_zero(case). Resolves llvm#70756
1 parent b8dface commit 424c424

File tree

3 files changed

+431
-4
lines changed

3 files changed

+431
-4
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6828,9 +6828,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
68286828

68296829
// This transform can be done speculatively because it is so cheap - it
68306830
// results in a single rotate operation being inserted.
6831-
// FIXME: It's possible that optimizing a switch on powers of two might also
6832-
// be beneficial - flag values are often powers of two and we could use a CLZ
6833-
// as the key function.
68346831

68356832
// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
68366833
// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
@@ -6875,6 +6872,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
68756872
return true;
68766873
}
68776874

6875+
/// Tries to transform switch of powers of two to reduce switch range.
6876+
/// For example, switch like:
6877+
/// switch (C) { case 1: case 2: case 64: case 128: }
6878+
/// will be transformed to:
6879+
/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
6880+
///
6881+
/// This transformation allows better lowering and could allow transforming into
6882+
/// a lookup table.
6883+
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
6884+
const DataLayout &DL,
6885+
const TargetTransformInfo &TTI) {
6886+
Value *Condition = SI->getCondition();
6887+
LLVMContext &Context = SI->getContext();
6888+
auto *CondTy = cast<IntegerType>(Condition->getType());
6889+
6890+
if (CondTy->getIntegerBitWidth() > 64 ||
6891+
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6892+
return false;
6893+
6894+
const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
6895+
IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
6896+
{Condition, ConstantInt::getTrue(Context)}),
6897+
TTI::TCK_SizeAndLatency);
6898+
6899+
if (CttzIntrinsicCost > TTI::TCC_Basic)
6900+
// Inserting intrinsic is too expensive.
6901+
return false;
6902+
6903+
// Only bother with this optimization if there are more than 3 switch cases.
6904+
// SDAG will only bother creating jump tables for 4 or more cases.
6905+
if (SI->getNumCases() < 4)
6906+
return false;
6907+
6908+
// We perform this optimization only for switches with
6909+
// unreachable default case.
6910+
// This assumtion will save us from checking if `Condition` is a power of two.
6911+
if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
6912+
return false;
6913+
6914+
// Check that switch cases are powers of two.
6915+
SmallVector<uint64_t, 4> Values;
6916+
for (const auto &Case : SI->cases()) {
6917+
uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
6918+
if (llvm::has_single_bit(CaseValue))
6919+
Values.push_back(CaseValue);
6920+
else
6921+
return false;
6922+
}
6923+
6924+
// isSwichDense requires case values to be sorted.
6925+
llvm::sort(Values);
6926+
if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
6927+
llvm::countr_zero(Values.front()) + 1))
6928+
// Transform is unable to generate dense switch.
6929+
return false;
6930+
6931+
Builder.SetInsertPoint(SI);
6932+
6933+
// Replace each case with its trailing zeros number.
6934+
for (auto &Case : SI->cases()) {
6935+
auto *OrigValue = Case.getCaseValue();
6936+
Case.setValue(ConstantInt::get(OrigValue->getType(),
6937+
OrigValue->getValue().countr_zero()));
6938+
}
6939+
6940+
// Replace condition with its trailing zeros number.
6941+
auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
6942+
Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
6943+
6944+
SI->setCondition(ConditionTrailingZeros);
6945+
6946+
return true;
6947+
}
6948+
68786949
bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
68796950
BasicBlock *BB = SI->getParent();
68806951

@@ -6922,6 +6993,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
69226993
SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
69236994
return requestResimplify();
69246995

6996+
if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
6997+
return requestResimplify();
6998+
69256999
if (ReduceSwitchRange(SI, Builder, DL, TTI))
69267000
return requestResimplify();
69277001

llvm/test/CodeGen/AArch64/switch-unreachable-default.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ entry:
7171
i32 8, label %bb2
7272
i32 16, label %bb3
7373
i32 32, label %bb4
74-
i32 64, label %bb5
74+
i32 -64, label %bb5
7575
]
7676

7777
; The switch is lowered with a jump table for cases 1--32 and case 64 handled

0 commit comments

Comments
 (0)