[InstCombine] Fold `(x == y) ? 0 : (x > y ? 1 : -1)` into `ucmp/scmp(x,y)` #107314

Poseydon42 · 2024-09-04T21:22:56Z

Proofs: https://alive2.llvm.org/ce/z/Et9nrB

This also handles commuted cases of the same fold, with either the condition or the true/false values of the inner select being swapped.

llvmbot · 2024-09-04T21:23:28Z

@llvm/pr-subscribers-llvm-transforms

Author: Volodymyr Vasylkun (Poseydon42)

Changes

Proofs: https://alive2.llvm.org/ce/z/Et9nrB

This also handles commuted cases of the same fold, with either the condition or the true/false values of the inner select being swapped.

Full diff: https://github.com/llvm/llvm-project/pull/107314.diff

6 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+28-3)
(modified) llvm/test/Transforms/InstCombine/compare-3way.ll (+10-29)
(modified) llvm/test/Transforms/InstCombine/scmp.ll (+104)
(modified) llvm/test/Transforms/InstCombine/select-select.ll (+7-23)
(modified) llvm/test/Transforms/InstCombine/sink_to_unreachable.ll (+3-8)
(modified) llvm/test/Transforms/InstCombine/ucmp.ll (+14)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index fcd11126073bf1..70b44ff0ae8687 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3586,9 +3586,7 @@ Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
     Pred = ICmpInst::getSwappedPredicate(Pred);
     std::swap(LHS, RHS);
   }
-
-  Intrinsic::ID IID =
-      ICmpInst::isSigned(Pred) ? Intrinsic::scmp : Intrinsic::ucmp;
+  bool IsSigned = ICmpInst::isSigned(Pred);
 
   bool Replace = false;
   ICmpInst::Predicate ExtendedCmpPredicate;
@@ -3610,6 +3608,33 @@ Instruction *InstCombinerImpl::foldSelectToCmp(SelectInst &SI) {
        ICmpInst::getSwappedPredicate(ExtendedCmpPredicate) == Pred))
     Replace = true;
 
+  // (x == y) ? 0 : (x > y ? 1 : -1)
+  ICmpInst::Predicate FalseBranchSelectPredicate;
+  ConstantInt *InnerTV, *InnerFV;
+  if (Pred == ICmpInst::ICMP_EQ && match(TV, m_Zero()) &&
+      match(FV, m_Select(m_c_ICmp(FalseBranchSelectPredicate, m_Specific(LHS),
+                                  m_Specific(RHS)),
+                         m_ConstantInt(InnerTV), m_ConstantInt(InnerFV)))) {
+    if (!ICmpInst::isGT(FalseBranchSelectPredicate)) {
+      FalseBranchSelectPredicate =
+          ICmpInst::getSwappedPredicate(FalseBranchSelectPredicate);
+      std::swap(LHS, RHS);
+    }
+
+    if (!InnerTV->isOne()) {
+      std::swap(InnerTV, InnerFV);
+      std::swap(LHS, RHS);
+    }
+
+    if (ICmpInst::isGT(FalseBranchSelectPredicate) && InnerTV->isOne() &&
+        InnerFV->isAllOnesValue()) {
+      IsSigned = ICmpInst::isSigned(FalseBranchSelectPredicate);
+      Replace = true;
+    }
+  }
+
+  Intrinsic::ID IID =
+      IsSigned ? Intrinsic::scmp : Intrinsic::ucmp;
   if (Replace)
     return replaceInstUsesWith(
         SI, Builder.CreateIntrinsic(SI.getType(), IID, {LHS, RHS}));
diff --git a/llvm/test/Transforms/InstCombine/compare-3way.ll b/llvm/test/Transforms/InstCombine/compare-3way.ll
index e2067368fb4c3e..5d443cd45238c7 100644
--- a/llvm/test/Transforms/InstCombine/compare-3way.ll
+++ b/llvm/test/Transforms/InstCombine/compare-3way.ll
@@ -15,8 +15,7 @@ define void @test_low_sgt(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp ne i64 [[A]], [[B]]
-; CHECK-NEXT:    [[RESULT:%.*]] = zext i1 [[EQ]] to i32
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -62,10 +61,7 @@ define void @test_low_sge(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SLT:%.*]] = icmp slt i64 [[A]], [[B]]
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[SLT]], i32 -1, i32 1
-; CHECK-NEXT:    [[RESULT:%.*]] = select i1 [[EQ]], i32 0, i32 [[DOT]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -114,8 +110,7 @@ define void @test_low_ne(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp ne i64 [[A]], [[B]]
-; CHECK-NEXT:    [[RESULT:%.*]] = zext i1 [[EQ]] to i32
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -212,8 +207,7 @@ define void @test_mid_sge(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp ne i64 [[A]], [[B]]
-; CHECK-NEXT:    [[RESULT:%.*]] = zext i1 [[EQ]] to i32
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -238,10 +232,7 @@ define void @test_mid_sle(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SLT:%.*]] = icmp slt i64 [[A]], [[B]]
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[SLT]], i32 -1, i32 1
-; CHECK-NEXT:    [[RESULT:%.*]] = select i1 [[EQ]], i32 0, i32 [[DOT]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -266,9 +257,8 @@ define void @test_mid_ne(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[SLT:%.*]] = icmp slt i64 [[A]], [[B]]
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[SLT]], i32 -1, i32 1
-; CHECK-NEXT:    call void @use(i32 [[DOT]])
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
+; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
   %eq = icmp eq i64 %a, %b
@@ -338,10 +328,7 @@ define void @test_high_slt(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SLT:%.*]] = icmp slt i64 [[A]], [[B]]
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[SLT]], i32 -1, i32 1
-; CHECK-NEXT:    [[RESULT:%.*]] = select i1 [[EQ]], i32 0, i32 [[DOT]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -389,10 +376,7 @@ define void @test_high_sle(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SLT:%.*]] = icmp slt i64 [[A]], [[B]]
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[SLT]], i32 -1, i32 1
-; CHECK-NEXT:    [[RESULT:%.*]] = select i1 [[EQ]], i32 0, i32 [[DOT]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
@@ -417,10 +401,7 @@ define void @test_high_ne(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SLT:%.*]] = icmp slt i64 [[A]], [[B]]
-; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[SLT]], i32 -1, i32 1
-; CHECK-NEXT:    [[RESULT:%.*]] = select i1 [[EQ]], i32 0, i32 [[DOT]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])
 ; CHECK-NEXT:    call void @use(i32 [[RESULT]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/scmp.ll b/llvm/test/Transforms/InstCombine/scmp.ll
index 123bc647462337..397621b8c878ec 100644
--- a/llvm/test/Transforms/InstCombine/scmp.ll
+++ b/llvm/test/Transforms/InstCombine/scmp.ll
@@ -343,3 +343,107 @@ define i8 @scmp_from_select_gt_and_lt(i32 %x, i32 %y) {
   %r = select i1 %gt, i8 1, i8 %lt
   ret i8 %r
 }
+
+; (x == y) ? 0 : (x s> y ? 1 : -1) into scmp(x, y)
+define i8 @scmp_from_select_eq_and_gt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp sgt i32 %x, %y
+  %sel1 = select i1 %gt, i8 1, i8 -1
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}
+
+define i8 @scmp_from_select_eq_and_gt_commuted1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_commuted1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp slt i32 %x, %y
+  %sel1 = select i1 %gt, i8 1, i8 -1
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}
+
+define i8 @scmp_from_select_eq_and_gt_commuted2(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_commuted2(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp sgt i32 %x, %y
+  %sel1 = select i1 %gt, i8 -1, i8 1
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}
+
+define i8 @scmp_from_select_eq_and_gt_commuted3(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_commuted3(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp slt i32 %x, %y
+  %sel1 = select i1 %gt, i8 -1, i8 1
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}
+
+; Negative test: true value of outer select is not zero
+define i8 @scmp_from_select_eq_and_gt_neg1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_neg1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[GT:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[GT]], i8 1, i8 -1
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[EQ]], i8 5, i8 [[SEL1]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp sgt i32 %x, %y
+  %sel1 = select i1 %gt, i8 1, i8 -1
+  %r = select i1 %eq, i8 5, i8 %sel1
+  ret i8 %r
+}
+
+; Negative test: true value of inner select is not 1 or -1
+define i8 @scmp_from_select_eq_and_gt_neg2(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_neg2(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[GT:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[GT]], i8 2, i8 -1
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[EQ]], i8 0, i8 [[SEL1]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp sgt i32 %x, %y
+  %sel1 = select i1 %gt, i8 2, i8 -1
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}
+
+; Negative test: false value of inner select is not 1 or -1
+define i8 @scmp_from_select_eq_and_gt_neg3(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_neg3(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[EQ:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[GT:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[GT]], i8 1, i8 22
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[EQ]], i8 0, i8 [[SEL1]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp sgt i32 %x, %y
+  %sel1 = select i1 %gt, i8 1, i8 22
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}
diff --git a/llvm/test/Transforms/InstCombine/select-select.ll b/llvm/test/Transforms/InstCombine/select-select.ll
index 1feae5ab504dcf..94e88c2f6cbe6c 100644
--- a/llvm/test/Transforms/InstCombine/select-select.ll
+++ b/llvm/test/Transforms/InstCombine/select-select.ll
@@ -177,10 +177,7 @@ define <2 x i8> @sel_shuf_narrowing_commute2(<4 x i8> %x, <4 x i8> %y, <2 x i8>
 
 define i8 @strong_order_cmp_slt_eq(i32 %a, i32 %b) {
 ; CHECK-LABEL: @strong_order_cmp_slt_eq(
-; CHECK-NEXT:    [[CMP_LT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SEL_LT:%.*]] = select i1 [[CMP_LT]], i8 -1, i8 1
-; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL_EQ:%.*]] = select i1 [[CMP_EQ]], i8 0, i8 [[SEL_LT]]
+; CHECK-NEXT:    [[SEL_EQ:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A:%.*]], i32 [[B:%.*]])
 ; CHECK-NEXT:    ret i8 [[SEL_EQ]]
 ;
   %cmp.lt = icmp slt i32 %a, %b
@@ -192,10 +189,7 @@ define i8 @strong_order_cmp_slt_eq(i32 %a, i32 %b) {
 
 define i8 @strong_order_cmp_ult_eq(i32 %a, i32 %b) {
 ; CHECK-LABEL: @strong_order_cmp_ult_eq(
-; CHECK-NEXT:    [[CMP_LT:%.*]] = icmp ult i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SEL_LT:%.*]] = select i1 [[CMP_LT]], i8 -1, i8 1
-; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL_EQ:%.*]] = select i1 [[CMP_EQ]], i8 0, i8 [[SEL_LT]]
+; CHECK-NEXT:    [[SEL_EQ:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A:%.*]], i32 [[B:%.*]])
 ; CHECK-NEXT:    ret i8 [[SEL_EQ]]
 ;
   %cmp.lt = icmp ult i32 %a, %b
@@ -252,10 +246,7 @@ define i8 @strong_order_cmp_slt_ult_wrong_pred(i32 %a, i32 %b) {
 
 define i8 @strong_order_cmp_sgt_eq(i32 %a, i32 %b) {
 ; CHECK-LABEL: @strong_order_cmp_sgt_eq(
-; CHECK-NEXT:    [[CMP_GT:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SEL_GT:%.*]] = select i1 [[CMP_GT]], i8 1, i8 -1
-; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL_EQ:%.*]] = select i1 [[CMP_EQ]], i8 0, i8 [[SEL_GT]]
+; CHECK-NEXT:    [[SEL_EQ:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A:%.*]], i32 [[B:%.*]])
 ; CHECK-NEXT:    ret i8 [[SEL_EQ]]
 ;
   %cmp.gt = icmp sgt i32 %a, %b
@@ -267,10 +258,7 @@ define i8 @strong_order_cmp_sgt_eq(i32 %a, i32 %b) {
 
 define i8 @strong_order_cmp_ugt_eq(i32 %a, i32 %b) {
 ; CHECK-LABEL: @strong_order_cmp_ugt_eq(
-; CHECK-NEXT:    [[CMP_GT:%.*]] = icmp ugt i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SEL_GT:%.*]] = select i1 [[CMP_GT]], i8 1, i8 -1
-; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL_EQ:%.*]] = select i1 [[CMP_EQ]], i8 0, i8 [[SEL_GT]]
+; CHECK-NEXT:    [[SEL_EQ:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[A:%.*]], i32 [[B:%.*]])
 ; CHECK-NEXT:    ret i8 [[SEL_EQ]]
 ;
   %cmp.gt = icmp ugt i32 %a, %b
@@ -395,9 +383,7 @@ define i8 @strong_order_cmp_slt_eq_slt_not_oneuse(i32 %a, i32 %b) {
 ; CHECK-LABEL: @strong_order_cmp_slt_eq_slt_not_oneuse(
 ; CHECK-NEXT:    [[CMP_LT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    call void @use1(i1 [[CMP_LT]])
-; CHECK-NEXT:    [[SEL_LT:%.*]] = select i1 [[CMP_LT]], i8 -1, i8 1
-; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL_EQ:%.*]] = select i1 [[CMP_EQ]], i8 0, i8 [[SEL_LT]]
+; CHECK-NEXT:    [[SEL_EQ:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A]], i32 [[B]])
 ; CHECK-NEXT:    ret i8 [[SEL_EQ]]
 ;
   %cmp.lt = icmp slt i32 %a, %b
@@ -410,11 +396,9 @@ define i8 @strong_order_cmp_slt_eq_slt_not_oneuse(i32 %a, i32 %b) {
 
 define i8 @strong_order_cmp_sgt_eq_eq_not_oneuse(i32 %a, i32 %b) {
 ; CHECK-LABEL: @strong_order_cmp_sgt_eq_eq_not_oneuse(
-; CHECK-NEXT:    [[CMP_GT:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SEL_GT:%.*]] = select i1 [[CMP_GT]], i8 1, i8 -1
-; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A]], [[B]]
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    call void @use1(i1 [[CMP_EQ]])
-; CHECK-NEXT:    [[SEL_EQ:%.*]] = select i1 [[CMP_EQ]], i8 0, i8 [[SEL_GT]]
+; CHECK-NEXT:    [[SEL_EQ:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[A]], i32 [[B]])
 ; CHECK-NEXT:    ret i8 [[SEL_EQ]]
 ;
   %cmp.gt = icmp sgt i32 %a, %b
diff --git a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
index 02ed22217854ea..72aa6dc80df34c 100644
--- a/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
+++ b/llvm/test/Transforms/InstCombine/sink_to_unreachable.ll
@@ -10,8 +10,7 @@ define void @test_01(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br i1 [[C2]], label [[EXIT:%.*]], label [[UNREACHED:%.*]]
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[C1:%.*]] = icmp ne i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[COMPARATOR:%.*]] = zext i1 [[C1]] to i32
+; CHECK-NEXT:    [[COMPARATOR:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[X]], i32 [[Y]])
 ; CHECK-NEXT:    call void @use(i32 [[COMPARATOR]])
 ; CHECK-NEXT:    unreachable
 ; CHECK:       exit:
@@ -42,9 +41,7 @@ define void @test_02(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]]
 ; CHECK-NEXT:    br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]]
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
-; CHECK-NEXT:    [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
+; CHECK-NEXT:    [[COMPARATOR:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[X]], i32 [[Y]])
 ; CHECK-NEXT:    call void @use(i32 [[COMPARATOR]])
 ; CHECK-NEXT:    unreachable
 ; CHECK:       exit:
@@ -78,9 +75,7 @@ define i32 @test_03(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[C3:%.*]] = icmp sgt i32 [[X]], [[Y]]
 ; CHECK-NEXT:    br i1 [[C3]], label [[EXIT]], label [[UNREACHED:%.*]]
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[SIGNED:%.*]] = select i1 [[C2]], i32 -1, i32 1
-; CHECK-NEXT:    [[COMPARATOR:%.*]] = select i1 [[C1]], i32 0, i32 [[SIGNED]]
+; CHECK-NEXT:    [[COMPARATOR:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[X]], i32 [[Y]])
 ; CHECK-NEXT:    ret i32 [[COMPARATOR]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret i32 0
diff --git a/llvm/test/Transforms/InstCombine/ucmp.ll b/llvm/test/Transforms/InstCombine/ucmp.ll
index 13755f13bb0a11..2d5036019740cd 100644
--- a/llvm/test/Transforms/InstCombine/ucmp.ll
+++ b/llvm/test/Transforms/InstCombine/ucmp.ll
@@ -541,3 +541,17 @@ define i8 @ucmp_from_select_gt_and_lt(i32 %x, i32 %y) {
   %r = select i1 %gt, i8 1, i8 %lt
   ret i8 %r
 }
+
+; (x == y) ? 0 : (x u> y ? 1 : -1) into ucmp(x, y)
+define i8 @scmp_from_select_eq_and_gt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %eq = icmp eq i32 %x, %y
+  %gt = icmp ugt i32 %x, %y
+  %sel1 = select i1 %gt, i8 1, i8 -1
+  %r = select i1 %eq, i8 0, i8 %sel1
+  ret i8 %r
+}

github-actions · 2024-09-04T22:17:48Z

✅ With the latest revision this PR passed the C/C++ code formatter.

goldsteinn · 2024-09-04T23:11:38Z

By inverse fold for you mean x !- y ? x > y ? 1 : -1 : 0 or x > y ? 1 : x == y ? 0 : -1?

nikic · 2024-09-05T10:13:37Z

By inverse fold for you mean x !- y ? x > y ? 1 : -1 : 0 or x > y ? 1 : x == y ? 0 : -1?

It looks like this gets canonicalized: https://llvm.godbolt.org/z/7xTGM85G6 Probably good to add tests for that variant, but shouldn't need code changes.

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

nikic · 2024-09-05T10:16:48Z

llvm/test/Transforms/InstCombine/compare-3way.ll

@@ -15,8 +15,7 @@ define void @test_low_sgt(i64 %a, i64 %b) {
 ; CHECK:       normal:
 ; CHECK-NEXT:    ret void
 ; CHECK:       unreached:
-; CHECK-NEXT:    [[EQ:%.*]] = icmp ne i64 [[A]], [[B]]
-; CHECK-NEXT:    [[RESULT:%.*]] = zext i1 [[EQ]] to i32
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.scmp.i32.i64(i64 [[A]], i64 [[B]])


This is a bit iffy, but also not directly related to this PR. It's not clear whether we should expand scmp/ucmp back to icmp+select if we can exclude one of the cases.

I think for this particular case we can improve the way ucmp/scmp is handled in ConstraintElimination. The branch implies that within the unreached block a >= b, so we can replace the call with icmp + zext/sext. I don't think it's worth adding extra checks in InstCombine for this.

Poseydon42 · 2024-09-05T11:13:23Z

By inverse fold for you mean x !- y ? x > y ? 1 : -1 : 0 or x > y ? 1 : x == y ? 0 : -1?

The former; it gets canonicalized into x == y ? 0 : (x > y ? 1 : -1), which is then handled by this patch.

goldsteinn · 2024-09-05T15:58:48Z

By inverse fold for you mean x !- y ? x > y ? 1 : -1 : 0 or x > y ? 1 : x == y ? 0 : -1?

The former; it gets canonicalized into x == y ? 0 : (x > y ? 1 : -1), which is then handled by this patch.

Any value in handling the latter?

Poseydon42 · 2024-09-05T17:05:36Z

By inverse fold for you mean x !- y ? x > y ? 1 : -1 : 0 or x > y ? 1 : x == y ? 0 : -1?

The former; it gets canonicalized into x == y ? 0 : (x > y ? 1 : -1), which is then handled by this patch.

Any value in handling the latter?

It's already being folded as well if I'm not mistaken.

nikic

LGTM

@dtcxzyw Would be great if you could test this, in case it surfaces more optimization regressions...

PR Link: llvm/llvm-project#107314

dtcxzyw · 2024-09-06T06:11:17Z

LGTM

@dtcxzyw Would be great if you could test this, in case it surfaces more optimization regressions...

In most cases, regressions occur when the result of [u|s]cmp is passes to a phi node, then the phi node is compared with a constant:

bb1:
%cmp = call i32 @llvm.ucmp(...)
br label %exit

exit:
%phi = i32 [%cmp, %bb1], [-1, %bb2]
%cmp2 = icmp slt i32 %phi, 0

Another missed optimization: if ssat(X) is only used to compare with zero, we can use X instead.
Alive2: https://alive2.llvm.org/ce/z/RZhWXF

Poseydon42 · 2024-09-13T19:50:46Z

Rebased onto #107769. @dtcxzyw, can you please rerun the benchmark to see if the regression is fully gone. If that's the case I reckon we can go ahead and merge this and fix the other regression later as it seems to be less common.

PR Link: llvm/llvm-project#107314

dtcxzyw · 2024-09-14T03:16:09Z

See https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1307/files#r1759649980. Can we remove the one-use check? Or just use InVal->hasOneUser() instead.

dtcxzyw

LGTM.

Poseydon42 requested a review from dtcxzyw September 4, 2024 21:22

Poseydon42 requested a review from nikic as a code owner September 4, 2024 21:22

llvmbot added the llvm:transforms label Sep 4, 2024

Poseydon42 removed the request for review from nikic September 4, 2024 21:23

Poseydon42 force-pushed the fold-to-uscmp-2 branch from b32e7ab to fd7dd97 Compare September 4, 2024 22:14

dtcxzyw requested review from nikic and goldsteinn September 5, 2024 02:04

dtcxzyw mentioned this pull request Sep 5, 2024

Grep PR107314 dtcxzyw/llvm-opt-benchmark#1277

Closed

nikic reviewed Sep 5, 2024

View reviewed changes

nikic approved these changes Sep 5, 2024

View reviewed changes

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Sep 6, 2024

pre-commit: test PR107314

7cde2fc

PR Link: llvm/llvm-project#107314

dtcxzyw mentioned this pull request Sep 6, 2024

pre-commit: test PR107314 dtcxzyw/llvm-opt-benchmark#1280

Closed

Poseydon42 mentioned this pull request Sep 8, 2024

[InstCombine] Replace an integer comparison of a phi node with multiple ucmp/scmp operands and a constant with phi of individual comparisons of original intrinsic's arguments #107769

Merged

Poseydon42 added 6 commits September 13, 2024 19:51

Precommit tests

481e053

Implement the fold

839adc7

Update existing tests

4f388a4

Fix formatting

1a85093

Handle vector types

b7ee463

Add test for the inverse case

b841fc6

Poseydon42 force-pushed the fold-to-uscmp-2 branch from df1a722 to b841fc6 Compare September 13, 2024 19:49

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Sep 14, 2024

pre-commit: test PR107314

e2abadf

PR Link: llvm/llvm-project#107314

dtcxzyw mentioned this pull request Sep 14, 2024

pre-commit: test PR107314 (New) dtcxzyw/llvm-opt-benchmark#1307

Closed

nikic mentioned this pull request Sep 19, 2024

Add GSoC 2024 3-way comparison page llvm/llvm-blog-www#56

Merged

dtcxzyw mentioned this pull request Sep 23, 2024

[InstCombine] Relax the conditons of fold of ucmp/scmp into phi by allowing the phi node to use the result of ucmp/scmp more than once #109593

Merged

dtcxzyw approved these changes Sep 23, 2024

View reviewed changes

vfdff mentioned this pull request Sep 23, 2024

[InstCombine] Transform sub(select, select) into select when they have same conditions #106511

Closed

Poseydon42 merged commit d479849 into llvm:main Sep 23, 2024
6 of 8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[InstCombine] Fold `(x == y) ? 0 : (x > y ? 1 : -1)` into `ucmp/scmp(x,y)` #107314

[InstCombine] Fold `(x == y) ? 0 : (x > y ? 1 : -1)` into `ucmp/scmp(x,y)` #107314

Uh oh!

Poseydon42 commented Sep 4, 2024

Uh oh!

llvmbot commented Sep 4, 2024

Uh oh!

github-actions bot commented Sep 4, 2024 •

edited

Loading

Uh oh!

goldsteinn commented Sep 4, 2024

Uh oh!

nikic commented Sep 5, 2024

Uh oh!

Uh oh!

nikic Sep 5, 2024

Uh oh!

Poseydon42 Sep 5, 2024

Uh oh!

Poseydon42 commented Sep 5, 2024

Uh oh!

goldsteinn commented Sep 5, 2024

Uh oh!

Poseydon42 commented Sep 5, 2024

Uh oh!

nikic left a comment

Uh oh!

dtcxzyw commented Sep 6, 2024

Uh oh!

Poseydon42 commented Sep 13, 2024

Uh oh!

dtcxzyw commented Sep 14, 2024

Uh oh!

dtcxzyw left a comment

Uh oh!

Uh oh!

Uh oh!

[InstCombine] Fold (x == y) ? 0 : (x > y ? 1 : -1) into ucmp/scmp(x,y) #107314

[InstCombine] Fold (x == y) ? 0 : (x > y ? 1 : -1) into ucmp/scmp(x,y) #107314

Uh oh!

Conversation

Poseydon42 commented Sep 4, 2024

Uh oh!

llvmbot commented Sep 4, 2024

Uh oh!

github-actions bot commented Sep 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

goldsteinn commented Sep 4, 2024

Uh oh!

nikic commented Sep 5, 2024

Uh oh!

Uh oh!

nikic Sep 5, 2024

Choose a reason for hiding this comment

Uh oh!

Poseydon42 Sep 5, 2024

Choose a reason for hiding this comment

Uh oh!

Poseydon42 commented Sep 5, 2024

Uh oh!

goldsteinn commented Sep 5, 2024

Uh oh!

Poseydon42 commented Sep 5, 2024

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

dtcxzyw commented Sep 6, 2024

Uh oh!

Poseydon42 commented Sep 13, 2024

Uh oh!

dtcxzyw commented Sep 14, 2024

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

[InstCombine] Fold `(x == y) ? 0 : (x > y ? 1 : -1)` into `ucmp/scmp(x,y)` #107314

[InstCombine] Fold `(x == y) ? 0 : (x > y ? 1 : -1)` into `ucmp/scmp(x,y)` #107314

github-actions bot commented Sep 4, 2024 •

edited

Loading