-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[GlobalISel][AArch64] Fix fptoi.sat lowering. #127901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select it appears the order of the operands was chosen badly. I've switched the conditions used to keep the constant on the RHS.
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThe SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select it appears the order of the operands was chosen badly. I've switched the conditions used to keep the constant on the RHS. Patch is 34.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127901.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 319c4ac28c167..fb8187bf053d3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7816,13 +7816,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
if (AreExactFloatBounds) {
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
- auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+ auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_UGT,
SrcTy.changeElementSize(1), Src, MaxC);
auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
// Clamp by MaxFloat from above. NaN cannot occur.
auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
auto MinP =
- MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
MinC, MachineInstr::FmNoNans);
auto Min =
MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..85ad380f6e707 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
; CHECK-GI-NEXT: adrp x8, .LCPI30_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x9, lt
+; CHECK-GI-NEXT: csel x20, x8, x9, gt
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000
; CHECK-GI-NEXT: movk x8, #16413, lsl #48
-; CHECK-GI-NEXT: csel x8, x20, x8, gt
+; CHECK-GI-NEXT: csel x8, x20, x8, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..71b264eb0022f 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
; CHECK-GI-NEXT: adrp x8, .LCPI14_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x9, lt
+; CHECK-GI-NEXT: csel x20, x8, x9, gt
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000
; CHECK-GI-NEXT: movk x8, #16413, lsl #48
-; CHECK-GI-NEXT: csel x8, x20, x8, gt
+; CHECK-GI-NEXT: csel x8, x20, x8, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -656,26 +656,26 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: movk x22, #16413, lsl #48
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x22, gt
+; CHECK-GI-NEXT: csel x8, x21, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -686,21 +686,21 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w21, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x22, gt
+; CHECK-GI-NEXT: csel x8, x20, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -827,26 +827,26 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: str q2, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: movk x22, #16413, lsl #48
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x22, gt
+; CHECK-GI-NEXT: csel x8, x21, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -856,21 +856,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w21, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x23, x8, x20, lt
+; CHECK-GI-NEXT: csel x23, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x23
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x23, x22, gt
+; CHECK-GI-NEXT: csel x8, x23, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -881,21 +881,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w23, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x22, gt
+; CHECK-GI-NEXT: csel x8, x20, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
@@ -1043,26 +1043,26 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1]
; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp q1, q3, [sp, #64] // 32-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: movk x22, #16413, lsl #48
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x22, gt
+; CHECK-GI-NEXT: csel x8, x21, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -1073,21 +1073,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w21, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x23, x8, x20, lt
+; CHECK-GI-NEXT: csel x23, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x23
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x23, x22, gt
+; CHECK-GI-NEXT: csel x8, x23, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -1098,20 +1098,20 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w23, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x24, x8, x20, lt
+; CHECK-GI-NEXT: csel x24, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x24
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x24, x22, gt
+; CHECK-GI-NEXT: csel x8, x24, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -1121,21 +1121,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w24, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x22, gt
+; CHECK-GI-NEXT: csel x8, x20, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
@@ -5633,26 +5633,26 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4594234569871327232 // =0xc03e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: mov x22, #-1125899906842624 // =0xfffc000000000000
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x23, #4629137466983448575 // =0x403dffffffffffff
-; CHECK-GI-NEXT: csel x8, x19, x22, gt
+; CHECK-GI-NEXT: csel x8, x19, x22, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x23, gt
+; CHECK-GI-NEXT: csel x8, x21, x23, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfdi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -5663,21 +5663,21 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel x21, xzr, x19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, x22, gt
+; CHECK-GI-NEXT: csel x8, x19, x22, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x23, gt
+; CHECK-GI-NEXT: csel x8, x20, x23, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfdi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 0dea7be5052d0..c3f352ec92181 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -797,25 +797,25 @@ define i32 @test_unsigned_f128_i32(fp128 %f) {
; CHECK-GI-NEXT: adrp x8, .LCPI30_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, xzr, lt
+; CHECK-GI-NEXT: csel x20, x8, xzr, gt
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000
; CHECK-GI-NEXT: movk x8, #16414, lsl #48
-; CHECK-GI-NEXT: csel x8, x20, x8, gt
+; C...
[truncated]
|
@llvm/pr-subscribers-llvm-globalisel Author: David Green (davemgreen) ChangesThe SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select it appears the order of the operands was chosen badly. I've switched the conditions used to keep the constant on the RHS. Patch is 34.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127901.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 319c4ac28c167..fb8187bf053d3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7816,13 +7816,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
if (AreExactFloatBounds) {
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
- auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+ auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_UGT,
SrcTy.changeElementSize(1), Src, MaxC);
auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
// Clamp by MaxFloat from above. NaN cannot occur.
auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
auto MinP =
- MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
+ MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
MinC, MachineInstr::FmNoNans);
auto Min =
MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..85ad380f6e707 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
; CHECK-GI-NEXT: adrp x8, .LCPI30_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x9, lt
+; CHECK-GI-NEXT: csel x20, x8, x9, gt
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000
; CHECK-GI-NEXT: movk x8, #16413, lsl #48
-; CHECK-GI-NEXT: csel x8, x20, x8, gt
+; CHECK-GI-NEXT: csel x8, x20, x8, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..71b264eb0022f 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
; CHECK-GI-NEXT: adrp x8, .LCPI14_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x9, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x9, lt
+; CHECK-GI-NEXT: csel x20, x8, x9, gt
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov x8, #281474976448512 // =0xfffffffc0000
; CHECK-GI-NEXT: movk x8, #16413, lsl #48
-; CHECK-GI-NEXT: csel x8, x20, x8, gt
+; CHECK-GI-NEXT: csel x8, x20, x8, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -656,26 +656,26 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: movk x22, #16413, lsl #48
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x22, gt
+; CHECK-GI-NEXT: csel x8, x21, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -686,21 +686,21 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w21, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x22, gt
+; CHECK-GI-NEXT: csel x8, x20, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -827,26 +827,26 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: str q2, [sp, #64] // 16-byte Folded Spill
; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI16_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: movk x22, #16413, lsl #48
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x22, gt
+; CHECK-GI-NEXT: csel x8, x21, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -856,21 +856,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w21, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x23, x8, x20, lt
+; CHECK-GI-NEXT: csel x23, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x23
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x23, x22, gt
+; CHECK-GI-NEXT: csel x8, x23, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -881,21 +881,21 @@ define <3 x i32> @test_signed_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w23, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x22, gt
+; CHECK-GI-NEXT: csel x8, x20, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
@@ -1043,26 +1043,26 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1]
; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: stp q1, q3, [sp, #64] // 32-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4603241769126068224 // =0xc01e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-GI-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x22, #281474976448512 // =0xfffffffc0000
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: movk x22, #16413, lsl #48
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x22, gt
+; CHECK-GI-NEXT: csel x8, x21, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
@@ -1073,21 +1073,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w21, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x23, x8, x20, lt
+; CHECK-GI-NEXT: csel x23, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x23
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x23, x22, gt
+; CHECK-GI-NEXT: csel x8, x23, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -1098,20 +1098,20 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w23, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x24, x8, x20, lt
+; CHECK-GI-NEXT: csel x24, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x24
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x24, x22, gt
+; CHECK-GI-NEXT: csel x8, x24, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -1121,21 +1121,21 @@ define <4 x i32> @test_signed_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel w24, wzr, w19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x22, gt
+; CHECK-GI-NEXT: csel x8, x20, x22, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfsi
; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
@@ -5633,26 +5633,26 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
; CHECK-GI-NEXT: str q2, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x20, #-4594234569871327232 // =0xc03e000000000000
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x21, x8, x20, lt
+; CHECK-GI-NEXT: csel x21, x8, x20, gt
; CHECK-GI-NEXT: adrp x8, .LCPI86_0
; CHECK-GI-NEXT: mov v0.d[1], x21
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: mov x22, #-1125899906842624 // =0xfffc000000000000
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: mov x23, #4629137466983448575 // =0x403dffffffffffff
-; CHECK-GI-NEXT: csel x8, x19, x22, gt
+; CHECK-GI-NEXT: csel x8, x19, x22, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x21, x23, gt
+; CHECK-GI-NEXT: csel x8, x21, x23, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfdi
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -5663,21 +5663,21 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: csel x21, xzr, x19, ne
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, x20, lt
+; CHECK-GI-NEXT: csel x20, x8, x20, gt
; CHECK-GI-NEXT: mov v0.d[1], x20
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: csel x8, x19, x22, gt
+; CHECK-GI-NEXT: csel x8, x19, x22, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
-; CHECK-GI-NEXT: csel x8, x20, x23, gt
+; CHECK-GI-NEXT: csel x8, x20, x23, lt
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixtfdi
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 0dea7be5052d0..c3f352ec92181 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -797,25 +797,25 @@ define i32 @test_unsigned_f128_i32(fp128 %f) {
; CHECK-GI-NEXT: adrp x8, .LCPI30_1
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT: bl __getf2
+; CHECK-GI-NEXT: bl __letf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: fmov x8, d0
-; CHECK-GI-NEXT: csel x19, x8, xzr, lt
+; CHECK-GI-NEXT: csel x19, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
; CHECK-GI-NEXT: mov v0.d[0], x19
-; CHECK-GI-NEXT: csel x20, x8, xzr, lt
+; CHECK-GI-NEXT: csel x20, x8, xzr, gt
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
; CHECK-GI-NEXT: mov v0.d[1], x20
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT: bl __gttf2
+; CHECK-GI-NEXT: bl __lttf2
; CHECK-GI-NEXT: cmp w0, #0
; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT: csel x8, x19, xzr, gt
+; CHECK-GI-NEXT: csel x8, x19, xzr, lt
; CHECK-GI-NEXT: mov v0.d[0], x8
; CHECK-GI-NEXT: mov x8, #281474976579584 // =0xfffffffe0000
; CHECK-GI-NEXT: movk x8, #16414, lsl #48
-; CHECK-GI-NEXT: csel x8, x20, x8, gt
+; C...
[truncated]
|
@@ -7816,13 +7816,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) { | |||
if (AreExactFloatBounds) { | |||
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. | |||
auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat); | |||
auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, | |||
auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_UGT, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be OGT? Otherwise we'll select Src if Src is NaN, while we want to select zero.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select it appears the order of the operands was chosen badly. This switches the conditions used to keep the constant on the RHS. (cherry picked from commit 70ed381)
The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select it appears the order of the operands was chosen badly. I've switched the conditions used to keep the constant on the RHS.