Skip to content

Commit a2724e8

Browse files
nikicrorth
authored andcommitted
Revert "[SelectionDAG] Avoid one comparison when legalizing fmaximum (llvm#142732)"
This reverts commit 54da543. I made a logic error here with the assumption that both values are known to be +/-0.0.
1 parent 85f6afd commit a2724e8

File tree

9 files changed

+958
-773
lines changed

9 files changed

+958
-773
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8610,16 +8610,19 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
86108610
// fminimum/fmaximum requires -0.0 less than +0.0
86118611
if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
86128612
!DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8613+
auto IsSpecificZero = [&](SDValue F) {
8614+
FloatSignAsInt State;
8615+
DAG.getSignAsIntValue(State, DL, F);
8616+
return DAG.getSetCC(DL, CCVT, State.IntValue,
8617+
DAG.getConstant(0, DL, State.IntValue.getValueType()),
8618+
IsMax ? ISD::SETEQ : ISD::SETNE);
8619+
};
86138620
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
86148621
DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8615-
FloatSignAsInt State;
8616-
DAG.getSignAsIntValue(State, DL, LHS);
8617-
SDValue IsSpecificZero =
8618-
DAG.getSetCC(DL, CCVT, State.IntValue,
8619-
DAG.getConstant(0, DL, State.IntValue.getValueType()),
8620-
IsMax ? ISD::SETEQ : ISD::SETNE);
8621-
SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags);
8622-
MinMax = DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags);
8622+
SDValue LCmp =
8623+
DAG.getSelect(DL, VT, IsSpecificZero(LHS), LHS, MinMax, Flags);
8624+
SDValue RCmp = DAG.getSelect(DL, VT, IsSpecificZero(RHS), RHS, LCmp, Flags);
8625+
MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
86238626
}
86248627

86258628
return MinMax;

llvm/test/CodeGen/AArch64/fmaximum-legalization.ll

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -46,46 +46,51 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind {
4646
; CHECK-LABEL: maximum_fp128:
4747
; CHECK: // %bb.0:
4848
; CHECK-NEXT: sub sp, sp, #96
49-
; CHECK-NEXT: str q0, [sp, #64]
50-
; CHECK-NEXT: mov v2.16b, v1.16b
51-
; CHECK-NEXT: ldrb w8, [sp, #79]
5249
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
53-
; CHECK-NEXT: cmp w8, #0
54-
; CHECK-NEXT: b.ne .LBB1_2
50+
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
51+
; CHECK-NEXT: stp q1, q0, [sp, #48]
52+
; CHECK-NEXT: bl __gttf2
53+
; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
54+
; CHECK-NEXT: cmp w0, #0
55+
; CHECK-NEXT: b.le .LBB1_2
5556
; CHECK-NEXT: // %bb.1:
56-
; CHECK-NEXT: mov v2.16b, v0.16b
57+
; CHECK-NEXT: mov v1.16b, v0.16b
5758
; CHECK-NEXT: .LBB1_2:
5859
; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
59-
; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
60-
; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill
61-
; CHECK-NEXT: bl __gttf2
62-
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
63-
; CHECK-NEXT: cmp w0, #0
64-
; CHECK-NEXT: mov v2.16b, v1.16b
65-
; CHECK-NEXT: b.le .LBB1_4
66-
; CHECK-NEXT: // %bb.3:
67-
; CHECK-NEXT: mov v2.16b, v0.16b
68-
; CHECK-NEXT: .LBB1_4:
69-
; CHECK-NEXT: str q2, [sp, #48] // 16-byte Folded Spill
60+
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
7061
; CHECK-NEXT: bl __unordtf2
71-
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
62+
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
7263
; CHECK-NEXT: cmp w0, #0
73-
; CHECK-NEXT: b.eq .LBB1_6
74-
; CHECK-NEXT: // %bb.5:
64+
; CHECK-NEXT: b.eq .LBB1_4
65+
; CHECK-NEXT: // %bb.3:
7566
; CHECK-NEXT: adrp x8, .LCPI1_0
7667
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
68+
; CHECK-NEXT: .LBB1_4:
69+
; CHECK-NEXT: ldrb w8, [sp, #79]
70+
; CHECK-NEXT: mov v1.16b, v0.16b
71+
; CHECK-NEXT: cmp w8, #0
72+
; CHECK-NEXT: b.ne .LBB1_6
73+
; CHECK-NEXT: // %bb.5:
74+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
7775
; CHECK-NEXT: .LBB1_6:
78-
; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
76+
; CHECK-NEXT: ldrb w8, [sp, #63]
77+
; CHECK-NEXT: cmp w8, #0
78+
; CHECK-NEXT: b.ne .LBB1_8
79+
; CHECK-NEXT: // %bb.7:
80+
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
81+
; CHECK-NEXT: .LBB1_8:
7982
; CHECK-NEXT: adrp x8, .LCPI1_1
83+
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
84+
; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
8085
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1]
81-
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
86+
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
8287
; CHECK-NEXT: bl __eqtf2
83-
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
88+
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
8489
; CHECK-NEXT: cmp w0, #0
85-
; CHECK-NEXT: b.ne .LBB1_8
86-
; CHECK-NEXT: // %bb.7:
87-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
88-
; CHECK-NEXT: .LBB1_8:
90+
; CHECK-NEXT: b.ne .LBB1_10
91+
; CHECK-NEXT: // %bb.9:
92+
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
93+
; CHECK-NEXT: .LBB1_10:
8994
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
9095
; CHECK-NEXT: add sp, sp, #96
9196
; CHECK-NEXT: ret

llvm/test/CodeGen/ARM/fp-maximum-legalization.ll

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,40 @@
44
define double @maximum_double(double %x, double %y) nounwind {
55
; CHECK-LABEL: maximum_double:
66
; CHECK: @ %bb.0:
7-
; CHECK-NEXT: sub sp, sp, #8
7+
; CHECK-NEXT: sub sp, sp, #16
88
; CHECK-NEXT: vmov d17, r2, r3
99
; CHECK-NEXT: mov r2, #0
1010
; CHECK-NEXT: vmov d16, r0, r1
1111
; CHECK-NEXT: mov r3, #0
1212
; CHECK-NEXT: vcmp.f64 d16, d17
1313
; CHECK-NEXT: mov r0, #0
1414
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
15-
; CHECK-NEXT: vldr d18, .LCPI0_0
15+
; CHECK-NEXT: vstr d16, [sp, #8]
16+
; CHECK-NEXT: vstr d17, [sp]
17+
; CHECK-NEXT: ldrb r1, [sp, #15]
1618
; CHECK-NEXT: vmov.f64 d19, d17
17-
; CHECK-NEXT: vstr d16, [sp]
18-
; CHECK-NEXT: ldrb r1, [sp, #7]
1919
; CHECK-NEXT: clz r1, r1
20+
; CHECK-NEXT: vldr d18, .LCPI0_0
2021
; CHECK-NEXT: movwvs r2, #1
2122
; CHECK-NEXT: movwgt r3, #1
2223
; CHECK-NEXT: cmp r3, #0
2324
; CHECK-NEXT: vmovne.f64 d19, d16
2425
; CHECK-NEXT: cmp r2, #0
26+
; CHECK-NEXT: ldrb r2, [sp, #7]
2527
; CHECK-NEXT: vmovne.f64 d19, d18
2628
; CHECK-NEXT: lsrs r1, r1, #5
29+
; CHECK-NEXT: clz r1, r2
2730
; CHECK-NEXT: vcmp.f64 d19, #0
28-
; CHECK-NEXT: vmovne.f64 d17, d16
31+
; CHECK-NEXT: vmov.f64 d18, d19
32+
; CHECK-NEXT: vmovne.f64 d18, d16
33+
; CHECK-NEXT: lsrs r1, r1, #5
34+
; CHECK-NEXT: vmovne.f64 d18, d17
2935
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
3036
; CHECK-NEXT: movweq r0, #1
3137
; CHECK-NEXT: cmp r0, #0
32-
; CHECK-NEXT: vmovne.f64 d19, d17
38+
; CHECK-NEXT: vmovne.f64 d19, d18
3339
; CHECK-NEXT: vmov r0, r1, d19
34-
; CHECK-NEXT: add sp, sp, #8
40+
; CHECK-NEXT: add sp, sp, #16
3541
; CHECK-NEXT: bx lr
3642
; CHECK-NEXT: .p2align 3
3743
; CHECK-NEXT: @ %bb.1:

llvm/test/CodeGen/NVPTX/bf16-instructions.ll

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,28 +1351,30 @@ define bfloat @test_roundeven(bfloat %a) {
13511351
define bfloat @test_maximum(bfloat %a, bfloat %b) {
13521352
; SM70-LABEL: test_maximum(
13531353
; SM70: {
1354-
; SM70-NEXT: .reg .pred %p<5>;
1355-
; SM70-NEXT: .reg .b16 %rs<7>;
1354+
; SM70-NEXT: .reg .pred %p<6>;
1355+
; SM70-NEXT: .reg .b16 %rs<8>;
13561356
; SM70-NEXT: .reg .b32 %r<7>;
13571357
; SM70-EMPTY:
13581358
; SM70-NEXT: // %bb.0:
13591359
; SM70-NEXT: ld.param.b16 %rs1, [test_maximum_param_0];
1360-
; SM70-NEXT: setp.eq.s16 %p1, %rs1, 0;
13611360
; SM70-NEXT: ld.param.b16 %rs2, [test_maximum_param_1];
1362-
; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
13631361
; SM70-NEXT: cvt.u32.u16 %r1, %rs2;
13641362
; SM70-NEXT: shl.b32 %r2, %r1, 16;
13651363
; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
13661364
; SM70-NEXT: shl.b32 %r4, %r3, 16;
1367-
; SM70-NEXT: setp.gt.f32 %p2, %r4, %r2;
1368-
; SM70-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
1369-
; SM70-NEXT: setp.nan.f32 %p3, %r4, %r2;
1370-
; SM70-NEXT: selp.b16 %rs5, 0x7FC0, %rs4, %p3;
1371-
; SM70-NEXT: cvt.u32.u16 %r5, %rs5;
1365+
; SM70-NEXT: setp.gt.f32 %p1, %r4, %r2;
1366+
; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
1367+
; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
1368+
; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2;
1369+
; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0;
1370+
; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
1371+
; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
1372+
; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
1373+
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
13721374
; SM70-NEXT: shl.b32 %r6, %r5, 16;
1373-
; SM70-NEXT: setp.eq.f32 %p4, %r6, 0f00000000;
1374-
; SM70-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
1375-
; SM70-NEXT: st.param.b16 [func_retval0], %rs6;
1375+
; SM70-NEXT: setp.eq.f32 %p5, %r6, 0f00000000;
1376+
; SM70-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
1377+
; SM70-NEXT: st.param.b16 [func_retval0], %rs7;
13761378
; SM70-NEXT: ret;
13771379
;
13781380
; SM80-LABEL: test_maximum(
@@ -1473,44 +1475,48 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
14731475
define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
14741476
; SM70-LABEL: test_maximum_v2(
14751477
; SM70: {
1476-
; SM70-NEXT: .reg .pred %p<9>;
1477-
; SM70-NEXT: .reg .b16 %rs<15>;
1478+
; SM70-NEXT: .reg .pred %p<11>;
1479+
; SM70-NEXT: .reg .b16 %rs<19>;
14781480
; SM70-NEXT: .reg .b32 %r<16>;
14791481
; SM70-EMPTY:
14801482
; SM70-NEXT: // %bb.0:
14811483
; SM70-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_0];
14821484
; SM70-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_1];
14831485
; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2;
1484-
; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
1485-
; SM70-NEXT: setp.eq.s16 %p1, %rs4, 0;
1486-
; SM70-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
14871486
; SM70-NEXT: cvt.u32.u16 %r3, %rs2;
14881487
; SM70-NEXT: shl.b32 %r4, %r3, 16;
1488+
; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
14891489
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
14901490
; SM70-NEXT: shl.b32 %r6, %r5, 16;
1491-
; SM70-NEXT: setp.gt.f32 %p2, %r6, %r4;
1492-
; SM70-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
1493-
; SM70-NEXT: setp.nan.f32 %p3, %r6, %r4;
1494-
; SM70-NEXT: selp.b16 %rs9, 0x7FC0, %rs8, %p3;
1495-
; SM70-NEXT: cvt.u32.u16 %r7, %rs9;
1491+
; SM70-NEXT: setp.gt.f32 %p1, %r6, %r4;
1492+
; SM70-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
1493+
; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4;
1494+
; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2;
1495+
; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0;
1496+
; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
1497+
; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
1498+
; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
1499+
; SM70-NEXT: cvt.u32.u16 %r7, %rs6;
14961500
; SM70-NEXT: shl.b32 %r8, %r7, 16;
1497-
; SM70-NEXT: setp.eq.f32 %p4, %r8, 0f00000000;
1498-
; SM70-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
1499-
; SM70-NEXT: setp.eq.s16 %p5, %rs3, 0;
1500-
; SM70-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
1501+
; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000;
1502+
; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
15011503
; SM70-NEXT: cvt.u32.u16 %r9, %rs1;
15021504
; SM70-NEXT: shl.b32 %r10, %r9, 16;
15031505
; SM70-NEXT: cvt.u32.u16 %r11, %rs3;
15041506
; SM70-NEXT: shl.b32 %r12, %r11, 16;
15051507
; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10;
1506-
; SM70-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
1508+
; SM70-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
15071509
; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10;
1508-
; SM70-NEXT: selp.b16 %rs13, 0x7FC0, %rs12, %p7;
1509-
; SM70-NEXT: cvt.u32.u16 %r13, %rs13;
1510+
; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7;
1511+
; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0;
1512+
; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
1513+
; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0;
1514+
; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
1515+
; SM70-NEXT: cvt.u32.u16 %r13, %rs15;
15101516
; SM70-NEXT: shl.b32 %r14, %r13, 16;
1511-
; SM70-NEXT: setp.eq.f32 %p8, %r14, 0f00000000;
1512-
; SM70-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
1513-
; SM70-NEXT: mov.b32 %r15, {%rs14, %rs10};
1517+
; SM70-NEXT: setp.eq.f32 %p10, %r14, 0f00000000;
1518+
; SM70-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
1519+
; SM70-NEXT: mov.b32 %r15, {%rs18, %rs13};
15141520
; SM70-NEXT: st.param.b32 [func_retval0], %r15;
15151521
; SM70-NEXT: ret;
15161522
;

0 commit comments

Comments
 (0)