diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 101d60525f416..0bd5f2e66d942 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -383,6 +383,78 @@ void SCCPSolver::inferArgAttributes() const { } } +// GuaranteedBoundsPropagator is a class that propagates +// guaranteed bounds for values. Typically, a ValueLatticeElement +// associated with a frequently visited Phi node is marked as "overdefined" to +// prevent excessive iterations. However, GuaranteedBoundsPropagator enhances +// this by propagating guaranteed bounds up to the Phi node, potentially +// improving precision. +// Consider a scenario where a variable 'x' is evaluated in a branch with the +// condition 'x < 10'. In this case, we can confidently assert that 'x' will not +// exceed 10. GuaranteedBoundsPropagator leverages this information by +// propagating such guaranteed bounds up to the relevant Phi node. If all +// incoming values to the Phi node have guaranteed bounds, the union of these +// bounds will represent the guaranteed bounds for the Phi node itself. Once +// these bounds are established for the Phi node, they can be propagated further +// to other values that depend on this Phi node. +// However, if not all incoming branches to the Phi node have been explored or +// are active, the bounds for the Phi node cannot be fully guaranteed. In such +// cases, the propagator may still apply the best available bounds to the Phi +// node instead of marking it as overdefined. These bounds remain valid unless +// new branches become active. If any active incoming branches lack guaranteed +// bounds, the Phi node's state need to be adjusted to overdefined. +class GuaranteedBoundsPropagator { + DenseMap GuaranteedBounds; + +public: + std::optional getGuaranteedBounds(Value *V) { + if (!V->getType()->isIntegerTy()) + return {}; + if (Constant *C = dyn_cast(V)) + return C->toConstantRange(); + auto It = GuaranteedBounds.find(V); + if (It == GuaranteedBounds.end()) + return {}; + auto &Range = It->second; + if (Range.isFullSet()) + return {}; + return Range; + } + + void insertOrUpdate(Value *V, const ConstantRange &CR) { + if (CR.isFullSet()) + return; + auto It = GuaranteedBounds.find(V); + if (It == GuaranteedBounds.end()) { + GuaranteedBounds.insert({V, CR}); + } else { + It->second = CR; + } + } + + // If ImposedCR is not full set, then we update guaranteed bounds + // of OutputValue. If in addition InputValue has guaranteed bounds, + // we update the guaranteed bounds of OutputValue to be the intersection + // of the two. + void processConditionalBranch(Value *OutputValue, Value *InputValue, + const ConstantRange &ImposedCR); + + // Updates the guaranteed bounds of the corresponding value if the + // operands have guaranteed bounds. + void processBinaryOp(Instruction *I); + + // If guaranteed bounds from all incoming edges are known, the union of all + // of the bounds is returned. The flag \p IsBoundGuaranteed is set to true. + // If all the incoming edges were not explored yet, but the ones that were + // all have guaranteed bounds, the union of the bounds is returned and the + // flag \p IsBoundGuaranteed is set to false. If some of the incoming edges + // do not have guaranteed bounds (or we failed to calculate union), + // the function returns std::nullopt. + std::optional processPhiNode( + PHINode *PN, + const SmallVector &IncomingValuesFromActiveBranches); +}; + /// Helper class for SCCPSolver. This implements the instruction visitor and /// holds all the state. class SCCPInstVisitor : public InstVisitor { @@ -450,6 +522,8 @@ class SCCPInstVisitor : public InstVisitor { DenseMap> AdditionalUsers; + GuaranteedBoundsPropagator BoundsPropagator; + LLVMContext &Ctx; private: @@ -1255,6 +1329,7 @@ void SCCPInstVisitor::visitPHINode(PHINode &PN) { return (void)markOverdefined(&PN); unsigned NumActiveIncoming = 0; + SmallVector IncomingValuesFromActiveBranches; // Look at all of the executable operands of the PHI node. If any of them // are overdefined, the PHI becomes overdefined as well. If they are all @@ -1265,7 +1340,7 @@ void SCCPInstVisitor::visitPHINode(PHINode &PN) { for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) continue; - + IncomingValuesFromActiveBranches.push_back(PN.getIncomingValue(i)); ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); PhiState.mergeIn(IV); NumActiveIncoming++; @@ -1273,6 +1348,27 @@ void SCCPInstVisitor::visitPHINode(PHINode &PN) { break; } + // If we have visited this PHI node too many times, we first check + // if there is a known bounds we could use. If not, the state will + // be maked as overdefined. + auto OptionalBestBounds = + BoundsPropagator.processPhiNode(&PN, IncomingValuesFromActiveBranches); + auto &OldState = getValueState(&PN); + if (OptionalBestBounds && PhiState.isConstantRange() && + OldState.isConstantRange()) { + ConstantRange OldStateRange = OldState.getConstantRange(); + ConstantRange NewStateRange = PhiState.getConstantRange(); + if (OldStateRange != NewStateRange && + OldState.getNumRangeExtensions() > NumActiveIncoming) { + PhiState = ValueLatticeElement::getRange(*OptionalBestBounds); + mergeInValue(&PN, PhiState); + ValueLatticeElement &PhiStateRef = getValueState(&PN); + PhiStateRef.setNumRangeExtensions( + std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions())); + return; + } + } + // We allow up to 1 range extension per active incoming value and one // additional extension. Note that we manually adjust the number of range // extensions to match the number of active incoming values. This helps to @@ -1280,7 +1376,7 @@ void SCCPInstVisitor::visitPHINode(PHINode &PN) { // incoming values are equal. mergeInValue(&PN, PhiState, ValueLatticeElement::MergeOptions().setMaxWidenSteps( - NumActiveIncoming + 1)); + NumActiveIncoming + 2)); ValueLatticeElement &PhiStateRef = getValueState(&PN); PhiStateRef.setNumRangeExtensions( std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions())); @@ -1585,6 +1681,8 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { R = A.overflowingBinaryOp(BO->getOpcode(), B, OBO->getNoWrapKind()); else R = A.binaryOp(BO->getOpcode(), B); + + BoundsPropagator.processBinaryOp(&I); mergeInValue(&I, ValueLatticeElement::getRange(R)); // TODO: Currently we do not exploit special values that produce something @@ -1880,9 +1978,12 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { ConstantRange::getFull(DL.getTypeSizeInBits(CopyOf->getType())); // Get the range imposed by the condition. - if (CondVal.isConstantRange()) + if (CondVal.isConstantRange()) { ImposedCR = ConstantRange::makeAllowedICmpRegion( Pred, CondVal.getConstantRange()); + if (BoundsPropagator.getGuaranteedBounds(OtherOp)) + BoundsPropagator.processConditionalBranch(&CB, CopyOf, ImposedCR); + } // Combine range info for the original value with the new range from the // condition. @@ -2252,3 +2353,54 @@ void SCCPSolver::markFunctionUnreachable(Function *F) { void SCCPSolver::visit(Instruction *I) { Visitor->visit(I); } void SCCPSolver::visitCall(CallInst &I) { Visitor->visitCall(I); } + +void GuaranteedBoundsPropagator::processConditionalBranch( + Value *OutputValue, Value *InputValue, const ConstantRange &ImposedCR) { + auto OptionalInputValueBounds = getGuaranteedBounds(InputValue); + if (OptionalInputValueBounds) + insertOrUpdate(OutputValue, + ImposedCR.intersectWith(*OptionalInputValueBounds)); + else + insertOrUpdate(OutputValue, ImposedCR); +} + +void GuaranteedBoundsPropagator::processBinaryOp(Instruction *I) { + auto *BO = cast(I); + assert(BO && "Expected binary op"); + auto OptionalLHSBounds = getGuaranteedBounds(BO->getOperand(0)); + auto OptionalRHSBounds = getGuaranteedBounds(BO->getOperand(1)); + if (!OptionalLHSBounds || !OptionalRHSBounds) + return; + ConstantRange R = + ConstantRange::getEmpty(I->getType()->getScalarSizeInBits()); + if (auto *OBO = dyn_cast(BO)) + R = OptionalLHSBounds->overflowingBinaryOp( + BO->getOpcode(), *OptionalRHSBounds, OBO->getNoWrapKind()); + else + R = OptionalLHSBounds->binaryOp(BO->getOpcode(), *OptionalRHSBounds); + insertOrUpdate(I, R); +} + +std::optional GuaranteedBoundsPropagator::processPhiNode( + PHINode *PN, + const SmallVector &IncomingValuesFromActiveBranches) { + auto OptionalExistingBounds = getGuaranteedBounds(PN); + if (OptionalExistingBounds) + return *OptionalExistingBounds; + + ConstantRange R = + ConstantRange::getEmpty(PN->getType()->getScalarSizeInBits()); + for (Value *IncomingValue : IncomingValuesFromActiveBranches) { + auto OptionalIncomingBounds = getGuaranteedBounds(IncomingValue); + if (!OptionalIncomingBounds) + return {}; + // TODO: Handle disjoint ranges in the future, if needed. + auto OptionalUnion = R.exactUnionWith(*OptionalIncomingBounds); + if (!OptionalUnion) + return {}; + R = *OptionalUnion; + } + if (PN->getNumIncomingValues() == IncomingValuesFromActiveBranches.size()) + insertOrUpdate(PN, R); + return R; +} diff --git a/llvm/test/Transforms/SCCP/loop-removal.ll b/llvm/test/Transforms/SCCP/loop-removal.ll new file mode 100644 index 0000000000000..05f7a096734ef --- /dev/null +++ b/llvm/test/Transforms/SCCP/loop-removal.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=ipsccp -S | FileCheck %s + +define i32 @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: init: +; CHECK-NEXT: br label %[[OUTER_LOOP_CONTROL:.*]] +; CHECK: outer.loop.control: +; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 0, [[INIT:%.*]] ], [ [[X_OUTER:%.*]], [[OUTER_LOOP_INC:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[X_0]], 10 +; CHECK-NEXT: br i1 [[TMP0]], label %[[INNER_LOOP_CONTROL:.*]], label %[[EXIT:.*]] +; CHECK: inner.loop.control: +; CHECK-NEXT: br label [[OUTER_LOOP_INC]] +; CHECK: outer.loop.inc: +; CHECK-NEXT: [[X_OUTER]] = add nsw i32 [[X_0]], 2 +; CHECK-NEXT: br label %[[OUTER_LOOP_CONTROL]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[X_0]] +; +init: + br label %outer.loop.control + +outer.loop.control: ; preds = %init, %outer.loop.inc + %x.0 = phi i32 [ 0, %init ], [ %x.outer, %outer.loop.inc ] + %0 = icmp slt i32 %x.0, 10 + br i1 %0, label %inner.loop.control, label %exit + +inner.loop.control: ; preds = %outer.loop.control, %inner.loop.body + %x.1 = phi i32 [ %x.0, %outer.loop.control ], [ %x.inner, %inner.loop.body ] + %1 = icmp sgt i32 %x.1, 20 + br i1 %1, label %inner.loop.body, label %outer.loop.inc + +inner.loop.body: ; preds = %inner.loop.control + %x.inner = sub nsw i32 %x.1, 1 + br label %inner.loop.control + +outer.loop.inc: ; preds = %inner.loop.control + %x.outer = add nsw i32 %x.1, 2 + br label %outer.loop.control + +exit: ; preds = %1 + ret i32 %x.0 +} diff --git a/llvm/test/Transforms/SCCP/undef-resolve.ll b/llvm/test/Transforms/SCCP/undef-resolve.ll index 8bb2baa82dce3..7f8842f31f037 100644 --- a/llvm/test/Transforms/SCCP/undef-resolve.ll +++ b/llvm/test/Transforms/SCCP/undef-resolve.ll @@ -38,20 +38,13 @@ define i32 @test2() nounwind readnone ssp { ; CHECK-NEXT: br label [[CONTROL_US:%.*]] ; CHECK: bb3.us: ; CHECK-NEXT: br label [[CONTROL_OUTER_US]] -; CHECK: bb0.us: -; CHECK-NEXT: br label [[CONTROL_US]] ; CHECK: control.us: -; CHECK-NEXT: [[SWITCHCOND_0_US]] = phi i32 [ [[A_0_PH_US]], [[BB0_US:%.*]] ], [ [[SWITCHCOND_0_PH_US]], [[CONTROL_OUTER_US]] ] -; CHECK-NEXT: switch i32 [[SWITCHCOND_0_US]], label [[CONTROL_OUTER_LOOPEXIT_US_LCSSA_US:%.*]] [ -; CHECK-NEXT: i32 0, label [[BB0_US]] -; CHECK-NEXT: i32 1, label [[BB1_US_LCSSA_US:%.*]] -; CHECK-NEXT: i32 3, label [[BB3_US]] +; CHECK-NEXT: switch i32 [[SWITCHCOND_0_PH_US]], label [[CONTROL_OUTER_LOOPEXIT_US_LCSSA_US:%.*]] [ ; CHECK-NEXT: i32 4, label [[BB4_US_LCSSA_US:%.*]] +; CHECK-NEXT: i32 3, label [[BB3_US]] ; CHECK-NEXT: ] ; CHECK: control.outer.loopexit.us-lcssa.us: ; CHECK-NEXT: br label [[CONTROL_OUTER_LOOPEXIT]] -; CHECK: bb1.us-lcssa.us: -; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb4.us-lcssa.us: ; CHECK-NEXT: br label [[BB4:%.*]] ; CHECK: control.outer: @@ -79,7 +72,7 @@ define i32 @test2() nounwind readnone ssp { ; CHECK: bb0: ; CHECK-NEXT: br label [[CONTROL]] ; CHECK: bb1.us-lcssa: -; CHECK-NEXT: br label [[BB1]] +; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: ret i32 0 ;