-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[GlobalIsel] Post-review combine ADDO #85961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Thorsten Schütt (tschuett) ChangesFull diff: https://github.com/llvm/llvm-project/pull/85961.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d3f86af1e2908e..2a521b6b068af7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6945,10 +6945,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
LLT DstTy = MRI.getType(Dst);
LLT CarryTy = MRI.getType(Carry);
- // We want do fold the [u|s]addo.
- if (!MRI.hasOneNonDBGUse(Dst))
- return false;
-
// Fold addo, if the carry is dead -> add, undef.
if (MRI.use_nodbg_empty(Carry) &&
isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
@@ -6959,10 +6955,6 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
- // We want do fold the [u|s]addo.
- if (!MRI.hasOneNonDBGUse(Carry))
- return false;
-
// Canonicalize constant to RHS.
if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
if (IsSigned) {
@@ -6994,7 +6986,7 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
return true;
}
- // Fold (addo x, 0) -> x, no borrow
+ // Fold (addo x, 0) -> x, no carry
if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
MatchInfo = [=](MachineIRBuilder &B) {
B.buildCopy(Dst, LHS);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
index 6fced31a622d9d..ec66892b98fc7d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-overflow.mir
@@ -92,3 +92,87 @@ body: |
$w1 = COPY %o_wide
RET_ReallyLR implicit $w0
...
+---
+name: add_multiuse
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: add_multiuse
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: %const:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w1 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: $w2 = COPY %const(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %const:_(s32) = G_CONSTANT i32 0
+ %add:_(s32), %o:_(s1) = G_SADDO %0, %const
+ %o_wide:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %add(s32)
+ $w1 = COPY %add(s32)
+ $w2 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
+---
+name: add_vector
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: add_vector
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3
+ ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: %bv1:_(<4 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK-NEXT: %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
+ ; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+ ; CHECK-NEXT: $q0 = COPY %add(<4 x s32>)
+ ; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = COPY $w2
+ %3:_(s32) = COPY $w3
+ %bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
+ %bv1:_(<4 x s32>) = G_BUILD_VECTOR %2:_(s32), %3:_(s32), %2:_(s32), %3:_(s32)
+ %add:_(<4 x s32>), %o:_(<4 x s1>) = G_UADDO %bv0, %bv1
+ %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+ $q0 = COPY %add(<4 x s32>)
+ $q1 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
+---
+name: add_splat_vector
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: add_splat_vector
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: %bv0:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK-NEXT: %o:_(<4 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1), [[C]](s1), [[C]](s1)
+ ; CHECK-NEXT: %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+ ; CHECK-NEXT: $q0 = COPY %bv0(<4 x s32>)
+ ; CHECK-NEXT: $q1 = COPY %o_wide(<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %2:_(s32) = COPY $w2
+ %3:_(s32) = COPY $w3
+ %const:_(s32) = G_CONSTANT i32 0
+ %bv0:_(<4 x s32>) = G_BUILD_VECTOR %0:_(s32), %1:_(s32), %0:_(s32), %1:_(s32)
+ %bv1:_(<4 x s32>) = G_BUILD_VECTOR %const:_(s32), %const:_(s32), %const:_(s32), %const:_(s32)
+ %add:_(<4 x s32>), %o:_(<4 x s1>) = G_SADDO %bv0, %bv1
+ %o_wide:_(<4 x s32>) = G_ZEXT %o(<4 x s1>)
+ $q0 = COPY %add(<4 x s32>)
+ $q1 = COPY %o_wide
+ RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll
index 77c70668b65a01..0ec2d763685e91 100644
--- a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -2643,8 +2643,7 @@ define i8 @pr60530() {
;
; GISEL-LABEL: pr60530:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov w8, #1 // =0x1
-; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: mov w0, #255 // =0xff
; GISEL-NEXT: ret
%1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1)
%2 = extractvalue { i8, i1 } %1, 1
diff --git a/llvm/test/CodeGen/AArch64/overflow.ll b/llvm/test/CodeGen/AArch64/overflow.ll
index 1fd60c03097906..977141f2b84f4f 100644
--- a/llvm/test/CodeGen/AArch64/overflow.ll
+++ b/llvm/test/CodeGen/AArch64/overflow.ll
@@ -64,21 +64,10 @@ entry:
}
define i32 @saddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
-; SDAG-LABEL: saddo.select.i64:
-; SDAG: // %bb.0: // %entry
-; SDAG-NEXT: mov w0, w1
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: saddo.select.i64:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: mov w8, #13 // =0xd
-; GISEL-NEXT: and x9, x3, #0xc
-; GISEL-NEXT: and x8, x4, x8
-; GISEL-NEXT: cmn x9, x8
-; GISEL-NEXT: cset w8, vs
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel w0, w0, w1, ne
-; GISEL-NEXT: ret
+; CHECK-LABEL: saddo.select.i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
entry:
%lhs = and i64 %v4, 12
%rhs = and i64 %v5, 13
@@ -89,22 +78,10 @@ entry:
}
define i32 @uaddo.select.i64(i32 %v1, i32 %v2, i1 %v3, i64 %v4, i64 %v5) {
-; SDAG-LABEL: uaddo.select.i64:
-; SDAG: // %bb.0: // %entry
-; SDAG-NEXT: mov w0, w1
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: uaddo.select.i64:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: mov w8, #9 // =0x9
-; GISEL-NEXT: mov w9, #10 // =0xa
-; GISEL-NEXT: and x8, x3, x8
-; GISEL-NEXT: and x9, x4, x9
-; GISEL-NEXT: cmn x8, x9
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel w0, w0, w1, ne
-; GISEL-NEXT: ret
+; CHECK-LABEL: uaddo.select.i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: ret
entry:
%lhs = and i64 %v4, 9
%rhs = and i64 %v5, 10
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index b2311a87059c31..a69418d4364191 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -238,7 +238,7 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB0_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -612,7 +612,7 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB1_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xfffffbcd, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xfffffbcd, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -978,7 +978,7 @@ define i128 @fptosi_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB2_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
@@ -1338,7 +1338,7 @@ define i128 @fptoui_f32_to_i128(float %x) {
; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7]
; GISEL-NEXT: s_cbranch_execz .LBB3_4
; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else
-; GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 0xffffff6a, v6
+; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff6a, v6
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v0, vcc
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! Nice to see that this brings some real codegen improvements.
#82927