[GlobalIsel] Post-review combine ADDO #85961

tschuett · 2024-03-20T16:40:30Z

#82927

llvm#82927

llvmbot · 2024-03-20T16:41:05Z

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-backend-amdgpu

Author: Thorsten Schütt (tschuett)

Changes

#82927

Full diff: https://github.com/llvm/llvm-project/pull/85961.diff

5 Files Affected:

(modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+1-9)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir (+84)
(modified) llvm/test/CodeGen/AArch64/arm64-xaluo.ll (+1-2)
(modified) llvm/test/CodeGen/AArch64/overflow.ll (+8-31)
(modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+4-4)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d3f86af1e2908e..2a521b6b068af7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6945,10 +6945,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
   LLT DstTy = MRI.getType(Dst);
   LLT CarryTy = MRI.getType(Carry);
 
-  // We want do fold the [u|s]addo.
-  if (!MRI.hasOneNonDBGUse(Dst))
-    return false;
-
   // Fold addo, if the carry is dead -> add, undef.
   if (MRI.use_nodbg_empty(Carry) &&
       isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
@@ -6959,10 +6955,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
     return true;
   }
 
-  // We want do fold the [u|s]addo.
-  if (!MRI.hasOneNonDBGUse(Carry))
-    return false;
-
   // Canonicalize constant to RHS.
   if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
     if (IsSigned) {
@@ -6994,7 +6986,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
     return true;
   }
 
-  // Fold (addo x, 0) -> x, no borrow
+  // Fold (addo x, 0) -> x, no carry
   if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
     MatchInfo = [=](MachineIRBuilder &B) {
       B.buildCopy(Dst, LHS);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
index 6fced31a622d9d..ec66892b98fc7d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
@@ -92,3 +92,87 @@ body:             |
     $w1 = COPY %o_wide
     RET_ReallyLR implicit $w0
 ...
+---
+name:            add_multiuse
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_multiuse
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w1 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: $w2 = COPY %const(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %const:_(s32) = G_CONSTANT i32 0
+    %add:_(s32), %o:_(s1) = G_SADDO %0, %const
+    %o_wide:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %add(s32)
+    $w1 = COPY %add(s32)
+    $w2 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_vector
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_vector
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK-NEXT: %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
+    ; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+    ; CHECK-NEXT: $q0 = COPY %add(<4 x s32>)
+    ; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %3:_(s32) = COPY $w3
+    %bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
+    %bv1:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %3:_(s32), %2:_(s32), %3:_(s32)
+    %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
+    %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+    $q0 = COPY %add(<4 x s32>)
+    $q1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
+---
+name:            add_splat_vector
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: add_splat_vector
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+    ; CHECK-NEXT: %o:_(<4 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1), [[C]](s1), [[C]](s1)
+    ; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+    ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+    ; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %2:_(s32) = COPY $w2
+    %3:_(s32) = COPY $w3
+    %const:_(s32) = G_CONSTANT i32 0
+    %bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
+    %bv1:_(<4 x s32>) = G_BUILD_VECTOR %const:_(s32), %const:_(s32), %const:_(s32), %const:_(s32)
+    %add:_(<4 x s32>), %o:_(<4 x s1>) = G_SADDO %bv0, %bv1
+    %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+    $q0 = COPY %add(<4 x s32>)
+    $q1 = COPY %o_wide
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll
index 77c70668b65a01..0ec2d763685e91 100644
--- a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -2643,8 +2643,7 @@ define i8 @pr60530() {
 ;
 ; GISEL-LABEL: pr60530:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov w8, #1 // =0x1
-; GISEL-NEXT:    sbfx w0, w8, #0, #1
+; GISEL-NEXT:    mov w0, #255 // =0xff
 ; GISEL-NEXT:    ret
   %1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
   %2 = extractvalue { i8, i1 } %1, 1
diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll
index 1fd60c03097906..977141f2b84f4f 100644
--- a/llvm/test/CodeGen/AArch64/overflow.ll
+++ b/llvm/test/CodeGen/AArch64/overflow.ll
@@ -64,21 +64,10 @@ entry:
 }
 
 define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
-; SDAG-LABEL: saddo.select.i64:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w0, w1
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: saddo.select.i64:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    mov w8, #13 // =0xd
-; GISEL-NEXT:    and x9, x3, #0xc
-; GISEL-NEXT:    and x8, x4, x8
-; GISEL-NEXT:    cmn x9, x8
-; GISEL-NEXT:    cset w8, vs
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
-; GISEL-NEXT:    ret
+; CHECK-LABEL: saddo.select.i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
 entry:
   %lhs = and i64 %v4, 12
   %rhs = and i64 %v5, 13
@@ -89,22 +78,10 @@ entry:
 }
 
 define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
-; SDAG-LABEL: uaddo.select.i64:
-; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w0, w1
-; SDAG-NEXT:    ret
-;
-; GISEL-LABEL: uaddo.select.i64:
-; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    mov w8, #9 // =0x9
-; GISEL-NEXT:    mov w9, #10 // =0xa
-; GISEL-NEXT:    and x8, x3, x8
-; GISEL-NEXT:    and x9, x4, x9
-; GISEL-NEXT:    cmn x8, x9
-; GISEL-NEXT:    cset w8, hs
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w1, ne
-; GISEL-NEXT:    ret
+; CHECK-LABEL: uaddo.select.i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
 entry:
   %lhs = and i64 %v4, 9
   %rhs = and i64 %v5, 10
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index b2311a87059c31..a69418d4364191 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -238,7 +238,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB0_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xfffffbcd, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -612,7 +612,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB1_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xfffffbcd, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -978,7 +978,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff6a, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -1338,7 +1338,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
 ; GISEL-NEXT:    s_xor_b64 s[16:17], exec, s[6:7]
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_4
 ; GISEL-NEXT:  ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
+; GISEL-NEXT:    v_add_u32_e32 v6, 0xffffff6a, v6
 ; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v6, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v6
 ; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v0, vcc

jayfoad

Thanks! Nice to see that this brings some real codegen improvements.

llvm#82927

[GlobalIsel] Post-review combine ADDO

b259d02

llvm#82927

tschuett requested a review from jayfoad March 20, 2024 16:40

llvmbot added backend:AArch64 backend:AMDGPU llvm:globalisel labels Mar 20, 2024

jayfoad approved these changes Mar 20, 2024

View reviewed changes

tschuett merged commit deefe3f into llvm:main Mar 21, 2024

tschuett deleted the gisel-addo2 branch March 21, 2024 02:56

chencha3 pushed a commit to chencha3/llvm-project that referenced this pull request Mar 23, 2024

[GlobalIsel] Post-review combine ADDO (llvm#85961)

6fef2c9

llvm#82927

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[GlobalIsel] Post-review combine ADDO #85961

[GlobalIsel] Post-review combine ADDO #85961

tschuett commented Mar 20, 2024

llvmbot commented Mar 20, 2024 •

edited

Loading

jayfoad left a comment

[GlobalIsel] Post-review combine ADDO #85961

[GlobalIsel] Post-review combine ADDO #85961

Conversation

tschuett commented Mar 20, 2024

llvmbot commented Mar 20, 2024 • edited Loading

jayfoad left a comment

Choose a reason for hiding this comment

llvmbot commented Mar 20, 2024 •

edited

Loading