diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 8195e0539305c..473e51bb39fc4 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1809,8 +1809,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // Check to see whether the instruction can be folded into each phi operand. // If there is one operand that does not fold, remember the BB it is in. SmallVector NewPhiValues; - BasicBlock *NonSimplifiedBB = nullptr; - Value *NonSimplifiedInVal = nullptr; + SmallVector OpsToMoveUseToIncomingBB; + bool SeenNonSimplifiedInVal = false; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); BasicBlock *InBB = PN->getIncomingBlock(i); @@ -1820,35 +1820,64 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { continue; } - if (NonSimplifiedBB) return nullptr; // More than one non-simplified value. + // If the only use of phi is comparing it with a constant then we can + // put this comparison in the incoming BB directly after a ucmp/scmp call + // because we know that it will simplify to a single icmp. + // NOTE: the single-use check here is not only to ensure that the + // optimization is profitable, but also to avoid creating a potentially + // invalid phi node when we have a multi-edge in the CFG. + const APInt *Ignored; + if (isa(InVal) && InVal->hasOneUse() && + match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) { + OpsToMoveUseToIncomingBB.push_back(i); + NewPhiValues.push_back(nullptr); + continue; + } + + if (SeenNonSimplifiedInVal) + return nullptr; // More than one non-simplified value. + SeenNonSimplifiedInVal = true; + + // If there is exactly one non-simplified value, we can insert a copy of the + // operation in that block. However, if this is a critical edge, we would + // be inserting the computation on some other paths (e.g. inside a loop). + // Only do this if the pred block is unconditionally branching into the phi + // block. Also, make sure that the pred block is not dead code. + BranchInst *BI = dyn_cast(InBB->getTerminator()); + if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(InBB)) + return nullptr; - NonSimplifiedBB = InBB; - NonSimplifiedInVal = InVal; NewPhiValues.push_back(nullptr); + OpsToMoveUseToIncomingBB.push_back(i); // If the InVal is an invoke at the end of the pred block, then we can't // insert a computation after it without breaking the edge. if (isa(InVal)) - if (cast(InVal)->getParent() == NonSimplifiedBB) + if (cast(InVal)->getParent() == InBB) return nullptr; // Do not push the operation across a loop backedge. This could result in // an infinite combine loop, and is generally non-profitable (especially // if the operation was originally outside the loop). - if (isBackEdge(NonSimplifiedBB, PN->getParent())) + if (isBackEdge(InBB, PN->getParent())) return nullptr; } - // If there is exactly one non-simplified value, we can insert a copy of the - // operation in that block. However, if this is a critical edge, we would be - // inserting the computation on some other paths (e.g. inside a loop). Only - // do this if the pred block is unconditionally branching into the phi block. - // Also, make sure that the pred block is not dead code. - if (NonSimplifiedBB != nullptr) { - BranchInst *BI = dyn_cast(NonSimplifiedBB->getTerminator()); - if (!BI || !BI->isUnconditional() || - !DT.isReachableFromEntry(NonSimplifiedBB)) - return nullptr; + // Clone the instruction that uses the phi node and move it into the incoming + // BB because we know that the next iteration of InstCombine will simplify it. + for (auto OpIndex : OpsToMoveUseToIncomingBB) { + Value *Op = PN->getIncomingValue(OpIndex); + BasicBlock *OpBB = PN->getIncomingBlock(OpIndex); + + Instruction *Clone = I.clone(); + for (Use &U : Clone->operands()) { + if (U == PN) + U = Op; + else + U = U->DoPHITranslation(PN->getParent(), OpBB); + } + Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator()); + NewPhiValues[OpIndex] = Clone; } // Okay, we can do the transformation: create the new PHI node. @@ -1857,30 +1886,13 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { NewPN->takeName(PN); NewPN->setDebugLoc(PN->getDebugLoc()); - // If we are going to have to insert a new computation, do so right before the - // predecessor's terminator. - Instruction *Clone = nullptr; - if (NonSimplifiedBB) { - Clone = I.clone(); - for (Use &U : Clone->operands()) { - if (U == PN) - U = NonSimplifiedInVal; - else - U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB); - } - InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator()); - } - - for (unsigned i = 0; i != NumPHIValues; ++i) { - if (NewPhiValues[i]) - NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i)); - else - NewPN->addIncoming(Clone, PN->getIncomingBlock(i)); - } + for (unsigned i = 0; i != NumPHIValues; ++i) + NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i)); for (User *U : make_early_inc_range(PN->users())) { Instruction *User = cast(U); - if (User == &I) continue; + if (User == &I) + continue; replaceInstUsesWith(*User, NewPN); eraseInstFromFunction(*User); } diff --git a/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll new file mode 100644 index 0000000000000..2b75d5c547511 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/phi-with-multiple-unsimplifiable-values.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +declare void @use(i8 %value); + +; Since we know that any comparison of ucmp/scmp with a constant will result in +; a comparison of ucmp/scmp's operands, we can propagate such a comparison +; through the phi node and let the next iteration of instcombine simplify it. +define i1 @icmp_of_phi_of_scmp_with_constant(i1 %c, i16 %x, i16 %y) +; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant( +; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y]], [[X]] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ] +; CHECK-NEXT: ret i1 [[R]] +; +{ +entry: + br i1 %c, label %true, label %false +true: + %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y) + br label %exit +false: + %cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x) + br label %exit +exit: + %phi = phi i8 [%cmp1, %true], [%cmp2, %false] + %r = icmp slt i8 %phi, 0 + ret i1 %r +} + +; When one of the incoming values is ucmp/scmp and the other is not we can still perform the transformation +define i1 @icmp_of_phi_of_one_scmp_with_constant(i1 %c, i16 %x, i16 %y, i8 %false_val) +; CHECK-LABEL: define i1 @icmp_of_phi_of_one_scmp_with_constant( +; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[FALSE_VAL]], 0 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ] +; CHECK-NEXT: ret i1 [[PHI]] +; +{ +entry: + br i1 %c, label %true, label %false +true: + %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y) + br label %exit +false: + br label %exit +exit: + %phi = phi i8 [%cmp1, %true], [%false_val, %false] + %r = icmp slt i8 %phi, 0 + ret i1 %r +} + +; Negative test: the RHS of comparison that uses the phi node is not constant +define i1 @icmp_of_phi_of_scmp_with_non_constant(i1 %c, i16 %x, i16 %y, i8 %cmp) +; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_non_constant( +; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[CMP:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]]) +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ] +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], [[CMP]] +; CHECK-NEXT: ret i1 [[R]] +; +{ +entry: + br i1 %c, label %true, label %false +true: + %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y) + br label %exit +false: + %cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x) + br label %exit +exit: + %phi = phi i8 [%cmp1, %true], [%cmp2, %false] + %r = icmp slt i8 %phi, %cmp + ret i1 %r +} + +; Negative test: more than one incoming value of the phi node is not one-use +define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(i1 %c, i16 %x, i16 %y) +; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use( +; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]] +; CHECK: [[TRUE]]: +; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]]) +; CHECK-NEXT: call void @use(i8 [[CMP1]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[FALSE]]: +; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]]) +; CHECK-NEXT: call void @use(i8 [[CMP2]]) +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ] +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], 0 +; CHECK-NEXT: ret i1 [[R]] +; +{ +entry: + br i1 %c, label %true, label %false +true: + %cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y) + call void @use(i8 %cmp1) + br label %exit +false: + %cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x) + call void @use(i8 %cmp2) + br label %exit +exit: + %phi = phi i8 [%cmp1, %true], [%cmp2, %false] + %r = icmp slt i8 %phi, 0 + ret i1 %r +}