diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 68b5b1a78a346..312adc0d8d6df 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6792,9 +6792,6 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // This transform can be done speculatively because it is so cheap - it // results in a single rotate operation being inserted. - // FIXME: It's possible that optimizing a switch on powers of two might also - // be beneficial - flag values are often powers of two and we could use a CLZ - // as the key function. // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than // one element and LLVM disallows duplicate cases, Shift is guaranteed to be @@ -6839,6 +6836,80 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, return true; } +/// Tries to transform switch of powers of two to reduce switch range. +/// For example, switch like: +/// switch (C) { case 1: case 2: case 64: case 128: } +/// will be transformed to: +/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: } +/// +/// This transformation allows better lowering and could allow transforming into +/// a lookup table. +static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + Value *Condition = SI->getCondition(); + LLVMContext &Context = SI->getContext(); + auto *CondTy = cast(Condition->getType()); + + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + return false; + + const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost( + IntrinsicCostAttributes(Intrinsic::cttz, CondTy, + {Condition, ConstantInt::getTrue(Context)}), + TTI::TCK_SizeAndLatency); + + if (CttzIntrinsicCost > TTI::TCC_Basic) + // Inserting intrinsic is too expensive. + return false; + + // Only bother with this optimization if there are more than 3 switch cases. + // SDAG will only bother creating jump tables for 4 or more cases. + if (SI->getNumCases() < 4) + return false; + + // We perform this optimization only for switches with + // unreachable default case. + // This assumtion will save us from checking if `Condition` is a power of two. + if (!isa(SI->getDefaultDest()->getFirstNonPHIOrDbg())) + return false; + + // Check that switch cases are powers of two. + SmallVector Values; + for (const auto &Case : SI->cases()) { + uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue(); + if (llvm::has_single_bit(CaseValue)) + Values.push_back(CaseValue); + else + return false; + } + + // isSwichDense requires case values to be sorted. + llvm::sort(Values); + if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) - + llvm::countr_zero(Values.front()) + 1)) + // Transform is unable to generate dense switch. + return false; + + Builder.SetInsertPoint(SI); + + // Replace each case with its trailing zeros number. + for (auto &Case : SI->cases()) { + auto *OrigValue = Case.getCaseValue(); + Case.setValue(ConstantInt::get(OrigValue->getType(), + OrigValue->getValue().countr_zero())); + } + + // Replace condition with its trailing zeros number. + auto *ConditionTrailingZeros = Builder.CreateIntrinsic( + Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)}); + + SI->setCondition(ConditionTrailingZeros); + + return true; +} + bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -6886,6 +6957,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); + if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) + return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) return requestResimplify(); diff --git a/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll b/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll index 9acc5a150b630..c38daffb7ade1 100644 --- a/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll +++ b/llvm/test/CodeGen/AArch64/switch-unreachable-default.ll @@ -71,7 +71,7 @@ entry: i32 8, label %bb2 i32 16, label %bb3 i32 32, label %bb4 - i32 64, label %bb5 + i32 -64, label %bb5 ] ; The switch is lowered with a jump table for cases 1--32 and case 64 handled diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll new file mode 100644 index 0000000000000..893d1e0ff60b5 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll @@ -0,0 +1,353 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S -mtriple=riscv64 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I +; RUN: opt -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S -mtriple=riscv64 -mattr=+zbb < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Check that the range of switch of powers of two is reduced and switch itself is lowered to lookup-table. +define i32 @switch_of_powers(i32 %x) { +; RV64I-LABEL: @switch_of_powers( +; RV64I-NEXT: entry: +; RV64I-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [ +; RV64I-NEXT: i32 1, label [[RETURN:%.*]] +; RV64I-NEXT: i32 8, label [[BB2:%.*]] +; RV64I-NEXT: i32 16, label [[BB3:%.*]] +; RV64I-NEXT: i32 32, label [[BB4:%.*]] +; RV64I-NEXT: i32 64, label [[BB5:%.*]] +; RV64I-NEXT: ] +; RV64I: default_case: +; RV64I-NEXT: unreachable +; RV64I: bb2: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb3: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb4: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb5: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: return: +; RV64I-NEXT: [[P:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ] +; RV64I-NEXT: ret i32 [[P]] +; +; RV64ZBB-LABEL: @switch_of_powers( +; RV64ZBB-NEXT: entry: +; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) +; RV64ZBB-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP0]] to i8 +; RV64ZBB-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]] +; RV64ZBB-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1 +; RV64ZBB-NEXT: call void @llvm.assume(i1 [[SWITCH_LOBIT]]) +; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers, i32 0, i32 [[TMP0]] +; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; RV64ZBB-NEXT: ret i32 [[SWITCH_LOAD]] +; +entry: + switch i32 %x, label %default_case [ + i32 1, label %bb1 + i32 8, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + + +default_case: unreachable +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ] + ret i32 %p +} + +; Check that switch's of powers of two range is not reduced if default case is reachable +define i32 @switch_of_powers_reachable_default(i32 %x) { +; CHECK-LABEL: @switch_of_powers_reachable_default( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [ +; CHECK-NEXT: i32 1, label [[BB1:%.*]] +; CHECK-NEXT: i32 8, label [[BB2:%.*]] +; CHECK-NEXT: i32 16, label [[BB3:%.*]] +; CHECK-NEXT: i32 32, label [[BB4:%.*]] +; CHECK-NEXT: i32 64, label [[BB5:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb3: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb4: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb5: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[P]] +; +entry: + switch i32 %x, label %default_case [ + i32 1, label %bb1 + i32 8, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + + +default_case: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [-1, %default_case] + ret i32 %p +} + +; Check that switch with zero case is not considered as switch of powers of two +define i32 @switch_of_non_powers(i32 %x) { +; CHECK-LABEL: @switch_of_non_powers( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [ +; CHECK-NEXT: i32 0, label [[RETURN:%.*]] +; CHECK-NEXT: i32 1, label [[BB2:%.*]] +; CHECK-NEXT: i32 16, label [[BB3:%.*]] +; CHECK-NEXT: i32 32, label [[BB4:%.*]] +; CHECK-NEXT: i32 64, label [[BB5:%.*]] +; CHECK-NEXT: ] +; CHECK: default_case: +; CHECK-NEXT: unreachable +; CHECK: bb2: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb3: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb4: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb5: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[P]] +; +entry: + switch i32 %x, label %default_case [ + i32 0, label %bb1 + i32 1, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + + +default_case: unreachable +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ] + ret i32 %p +} + +define i32 @unable_to_create_dense_switch(i32 %x) { +; CHECK-LABEL: @unable_to_create_dense_switch( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [ +; CHECK-NEXT: i32 1, label [[RETURN:%.*]] +; CHECK-NEXT: i32 2, label [[BB3:%.*]] +; CHECK-NEXT: i32 4, label [[BB4:%.*]] +; CHECK-NEXT: i32 4096, label [[BB5:%.*]] +; CHECK-NEXT: ] +; CHECK: default_case: +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb4: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb5: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[P]] +; +entry: + switch i32 %x, label %default_case [ + i32 1, label %bb2 + i32 2, label %bb3 + i32 4, label %bb4 + i32 4096, label %bb5 + ] + + +default_case: unreachable +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %p = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ] + ret i32 %p +} + +declare i32 @bar(i32) +define i32 @unable_to_generate_lookup_table(i32 %x, i32 %y) { +; RV64I-LABEL: @unable_to_generate_lookup_table( +; RV64I-NEXT: entry: +; RV64I-NEXT: switch i32 [[Y:%.*]], label [[DEFAULT_CASE:%.*]] [ +; RV64I-NEXT: i32 1, label [[BB2:%.*]] +; RV64I-NEXT: i32 2, label [[BB3:%.*]] +; RV64I-NEXT: i32 8, label [[BB4:%.*]] +; RV64I-NEXT: i32 64, label [[BB5:%.*]] +; RV64I-NEXT: ] +; RV64I: default_case: +; RV64I-NEXT: unreachable +; RV64I: bb2: +; RV64I-NEXT: [[XOR2:%.*]] = xor i32 [[X:%.*]], 48 +; RV64I-NEXT: [[CALL2:%.*]] = call i32 @bar(i32 [[XOR2]]) +; RV64I-NEXT: [[ADD2:%.*]] = sub i32 [[CALL2]], [[X]] +; RV64I-NEXT: br label [[RETURN:%.*]] +; RV64I: bb3: +; RV64I-NEXT: [[XOR3:%.*]] = xor i32 [[X]], 96 +; RV64I-NEXT: [[CALL3:%.*]] = call i32 @bar(i32 [[XOR3]]) +; RV64I-NEXT: [[ADD3:%.*]] = add i32 [[CALL3]], [[X]] +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb4: +; RV64I-NEXT: [[CALL4:%.*]] = call i32 @bar(i32 [[X]]) +; RV64I-NEXT: [[ADD4:%.*]] = add i32 [[CALL4]], [[X]] +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb5: +; RV64I-NEXT: [[XOR5:%.*]] = xor i32 [[X]], 9 +; RV64I-NEXT: [[CALL5:%.*]] = call i32 @bar(i32 [[XOR5]]) +; RV64I-NEXT: [[ADD5:%.*]] = add i32 [[CALL5]], [[X]] +; RV64I-NEXT: br label [[RETURN]] +; RV64I: return: +; RV64I-NEXT: [[P:%.*]] = phi i32 [ [[ADD2]], [[BB2]] ], [ [[ADD3]], [[BB3]] ], [ [[ADD4]], [[BB4]] ], [ [[ADD5]], [[BB5]] ] +; RV64I-NEXT: ret i32 [[P]] +; +; RV64ZBB-LABEL: @unable_to_generate_lookup_table( +; RV64ZBB-NEXT: entry: +; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y:%.*]], i1 true) +; RV64ZBB-NEXT: switch i32 [[TMP0]], label [[DEFAULT_CASE:%.*]] [ +; RV64ZBB-NEXT: i32 0, label [[BB2:%.*]] +; RV64ZBB-NEXT: i32 1, label [[BB3:%.*]] +; RV64ZBB-NEXT: i32 3, label [[BB4:%.*]] +; RV64ZBB-NEXT: i32 6, label [[BB5:%.*]] +; RV64ZBB-NEXT: ] +; RV64ZBB: default_case: +; RV64ZBB-NEXT: unreachable +; RV64ZBB: bb2: +; RV64ZBB-NEXT: [[XOR2:%.*]] = xor i32 [[X:%.*]], 48 +; RV64ZBB-NEXT: [[CALL2:%.*]] = call i32 @bar(i32 [[XOR2]]) +; RV64ZBB-NEXT: [[ADD2:%.*]] = sub i32 [[CALL2]], [[X]] +; RV64ZBB-NEXT: br label [[RETURN:%.*]] +; RV64ZBB: bb3: +; RV64ZBB-NEXT: [[XOR3:%.*]] = xor i32 [[X]], 96 +; RV64ZBB-NEXT: [[CALL3:%.*]] = call i32 @bar(i32 [[XOR3]]) +; RV64ZBB-NEXT: [[ADD3:%.*]] = add i32 [[CALL3]], [[X]] +; RV64ZBB-NEXT: br label [[RETURN]] +; RV64ZBB: bb4: +; RV64ZBB-NEXT: [[CALL4:%.*]] = call i32 @bar(i32 [[X]]) +; RV64ZBB-NEXT: [[ADD4:%.*]] = add i32 [[CALL4]], [[X]] +; RV64ZBB-NEXT: br label [[RETURN]] +; RV64ZBB: bb5: +; RV64ZBB-NEXT: [[XOR5:%.*]] = xor i32 [[X]], 9 +; RV64ZBB-NEXT: [[CALL5:%.*]] = call i32 @bar(i32 [[XOR5]]) +; RV64ZBB-NEXT: [[ADD5:%.*]] = add i32 [[CALL5]], [[X]] +; RV64ZBB-NEXT: br label [[RETURN]] +; RV64ZBB: return: +; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ [[ADD2]], [[BB2]] ], [ [[ADD3]], [[BB3]] ], [ [[ADD4]], [[BB4]] ], [ [[ADD5]], [[BB5]] ] +; RV64ZBB-NEXT: ret i32 [[P]] +; +entry: + switch i32 %y, label %default_case [ + i32 1, label %bb2 + i32 2, label %bb3 + i32 8, label %bb4 + i32 64, label %bb5 + ] + + +default_case: unreachable +bb1: + %xor1 = xor i32 %x, 42 + %call1 = call i32 @bar(i32 %xor1) + %add1 = add i32 %call1, %x + br label %return +bb2: + %xor2 = xor i32 %x, 48 + %call2 = call i32 @bar(i32 %xor2) + %add2 = sub i32 %call2, %x + br label %return +bb3: + %xor3 = xor i32 %x, 96 + %call3 = call i32 @bar(i32 %xor3) + %add3 = add i32 %call3, %x + br label %return +bb4: + %call4 = call i32 @bar(i32 %x) + %add4 = add i32 %call4, %x + br label %return +bb5: + %xor5 = xor i32 %x, 9 + %call5 = call i32 @bar(i32 %xor5) + %add5 = add i32 %call5, %x + br label %return + +return: + %p = phi i32 [ %add1, %bb1 ], [ %add2, %bb2 ], [ %add3, %bb3 ], [ %add4, %bb4 ], [ %add5, %bb5 ] + + ret i32 %p +} + +define i128 @switch_with_long_condition(i128 %x) { +; CHECK-LABEL: @switch_with_long_condition( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i128 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [ +; CHECK-NEXT: i128 1, label [[RETURN:%.*]] +; CHECK-NEXT: i128 2, label [[BB3:%.*]] +; CHECK-NEXT: i128 4, label [[BB4:%.*]] +; CHECK-NEXT: i128 32, label [[BB5:%.*]] +; CHECK-NEXT: ] +; CHECK: default_case: +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb4: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: bb5: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[P:%.*]] = phi i128 [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i128 [[P]] +; +entry: + switch i128 %x, label %default_case [ + i128 1, label %bb2 + i128 2, label %bb3 + i128 4, label %bb4 + i128 32, label %bb5 + ] + + +default_case: unreachable +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %p = phi i128 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ] + ret i128 %p +}