Skip to content

[SelectionDAG] Avoid one comparison when legalizing fmaximum #142732

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8610,19 +8610,16 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
// fminimum/fmaximum requires -0.0 less than +0.0
if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
!DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
auto IsSpecificZero = [&](SDValue F) {
FloatSignAsInt State;
DAG.getSignAsIntValue(State, DL, F);
return DAG.getSetCC(DL, CCVT, State.IntValue,
DAG.getConstant(0, DL, State.IntValue.getValueType()),
IsMax ? ISD::SETEQ : ISD::SETNE);
};
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
SDValue LCmp =
DAG.getSelect(DL, VT, IsSpecificZero(LHS), LHS, MinMax, Flags);
SDValue RCmp = DAG.getSelect(DL, VT, IsSpecificZero(RHS), RHS, LCmp, Flags);
MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
FloatSignAsInt State;
DAG.getSignAsIntValue(State, DL, LHS);
SDValue IsSpecificZero =
DAG.getSetCC(DL, CCVT, State.IntValue,
DAG.getConstant(0, DL, State.IntValue.getValueType()),
IsMax ? ISD::SETEQ : ISD::SETNE);
SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags);
MinMax = DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags);
}

return MinMax;
Expand Down
59 changes: 27 additions & 32 deletions llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,51 +46,46 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind {
; CHECK-LABEL: maximum_fp128:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96
; CHECK-NEXT: str q0, [sp, #64]
; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: ldrb w8, [sp, #79]
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
; CHECK-NEXT: stp q1, q0, [sp, #48]
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: b.le .LBB1_2
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: b.ne .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v2.16b, v0.16b
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: bl __unordtf2
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: b.eq .LBB1_4
; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: b.le .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: mov v2.16b, v0.16b
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: ldrb w8, [sp, #79]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: b.ne .LBB1_6
; CHECK-NEXT: str q2, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: bl __unordtf2
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: b.eq .LBB1_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: .LBB1_6:
; CHECK-NEXT: ldrb w8, [sp, #63]
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: b.ne .LBB1_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: .LBB1_8:
; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: adrp x8, .LCPI1_1
; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1]
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: bl __eqtf2
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0
; CHECK-NEXT: b.ne .LBB1_10
; CHECK-NEXT: // %bb.9:
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: .LBB1_10:
; CHECK-NEXT: b.ne .LBB1_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .LBB1_8:
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
Expand Down
20 changes: 7 additions & 13 deletions llvm/test/CodeGen/ARM/fp-maximum-legalization.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,34 @@
define double @maximum_double(double %x, double %y) nounwind {
; CHECK-LABEL: maximum_double:
; CHECK: @ %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: mov r2, #0
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: mov r3, #0
; CHECK-NEXT: vcmp.f64 d16, d17
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vstr d16, [sp, #8]
; CHECK-NEXT: vstr d17, [sp]
; CHECK-NEXT: ldrb r1, [sp, #15]
; CHECK-NEXT: vldr d18, .LCPI0_0
; CHECK-NEXT: vmov.f64 d19, d17
; CHECK-NEXT: vstr d16, [sp]
; CHECK-NEXT: ldrb r1, [sp, #7]
; CHECK-NEXT: clz r1, r1
; CHECK-NEXT: vldr d18, .LCPI0_0
; CHECK-NEXT: movwvs r2, #1
; CHECK-NEXT: movwgt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmovne.f64 d19, d16
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: ldrb r2, [sp, #7]
; CHECK-NEXT: vmovne.f64 d19, d18
; CHECK-NEXT: lsrs r1, r1, #5
; CHECK-NEXT: clz r1, r2
; CHECK-NEXT: vcmp.f64 d19, #0
; CHECK-NEXT: vmov.f64 d18, d19
; CHECK-NEXT: vmovne.f64 d18, d16
; CHECK-NEXT: lsrs r1, r1, #5
; CHECK-NEXT: vmovne.f64 d18, d17
; CHECK-NEXT: vmovne.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: movweq r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: vmovne.f64 d19, d18
; CHECK-NEXT: vmovne.f64 d19, d17
; CHECK-NEXT: vmov r0, r1, d19
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: add sp, sp, #8
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
Expand Down
70 changes: 32 additions & 38 deletions llvm/test/CodeGen/NVPTX/bf16-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1351,30 +1351,28 @@ define bfloat @test_roundeven(bfloat %a) {
define bfloat @test_maximum(bfloat %a, bfloat %b) {
; SM70-LABEL: test_maximum(
; SM70: {
; SM70-NEXT: .reg .pred %p<6>;
; SM70-NEXT: .reg .b16 %rs<8>;
; SM70-NEXT: .reg .pred %p<5>;
; SM70-NEXT: .reg .b16 %rs<7>;
; SM70-NEXT: .reg .b32 %r<7>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b16 %rs1, [test_maximum_param_0];
; SM70-NEXT: setp.eq.s16 %p1, %rs1, 0;
; SM70-NEXT: ld.param.b16 %rs2, [test_maximum_param_1];
; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; SM70-NEXT: cvt.u32.u16 %r1, %rs2;
; SM70-NEXT: shl.b32 %r2, %r1, 16;
; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
; SM70-NEXT: shl.b32 %r4, %r3, 16;
; SM70-NEXT: setp.gt.f32 %p1, %r4, %r2;
; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2;
; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0;
; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
; SM70-NEXT: setp.gt.f32 %p2, %r4, %r2;
; SM70-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2;
; SM70-NEXT: setp.nan.f32 %p3, %r4, %r2;
; SM70-NEXT: selp.b16 %rs5, 0x7FC0, %rs4, %p3;
; SM70-NEXT: cvt.u32.u16 %r5, %rs5;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
; SM70-NEXT: setp.eq.f32 %p5, %r6, 0f00000000;
; SM70-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5;
; SM70-NEXT: st.param.b16 [func_retval0], %rs7;
; SM70-NEXT: setp.eq.f32 %p4, %r6, 0f00000000;
; SM70-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4;
; SM70-NEXT: st.param.b16 [func_retval0], %rs6;
; SM70-NEXT: ret;
;
; SM80-LABEL: test_maximum(
Expand Down Expand Up @@ -1475,48 +1473,44 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) {
define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
; SM70-LABEL: test_maximum_v2(
; SM70: {
; SM70-NEXT: .reg .pred %p<11>;
; SM70-NEXT: .reg .b16 %rs<19>;
; SM70-NEXT: .reg .pred %p<9>;
; SM70-NEXT: .reg .b16 %rs<15>;
; SM70-NEXT: .reg .b32 %r<16>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_0];
; SM70-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_1];
; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2;
; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; SM70-NEXT: setp.eq.s16 %p1, %rs4, 0;
; SM70-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1;
; SM70-NEXT: cvt.u32.u16 %r3, %rs2;
; SM70-NEXT: shl.b32 %r4, %r3, 16;
; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
; SM70-NEXT: setp.gt.f32 %p1, %r6, %r4;
; SM70-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1;
; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4;
; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2;
; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0;
; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3;
; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4;
; SM70-NEXT: cvt.u32.u16 %r7, %rs6;
; SM70-NEXT: setp.gt.f32 %p2, %r6, %r4;
; SM70-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2;
; SM70-NEXT: setp.nan.f32 %p3, %r6, %r4;
; SM70-NEXT: selp.b16 %rs9, 0x7FC0, %rs8, %p3;
; SM70-NEXT: cvt.u32.u16 %r7, %rs9;
; SM70-NEXT: shl.b32 %r8, %r7, 16;
; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000;
; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5;
; SM70-NEXT: setp.eq.f32 %p4, %r8, 0f00000000;
; SM70-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4;
; SM70-NEXT: setp.eq.s16 %p5, %rs3, 0;
; SM70-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5;
; SM70-NEXT: cvt.u32.u16 %r9, %rs1;
; SM70-NEXT: shl.b32 %r10, %r9, 16;
; SM70-NEXT: cvt.u32.u16 %r11, %rs3;
; SM70-NEXT: shl.b32 %r12, %r11, 16;
; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10;
; SM70-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6;
; SM70-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6;
; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10;
; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7;
; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0;
; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8;
; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0;
; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9;
; SM70-NEXT: cvt.u32.u16 %r13, %rs15;
; SM70-NEXT: selp.b16 %rs13, 0x7FC0, %rs12, %p7;
; SM70-NEXT: cvt.u32.u16 %r13, %rs13;
; SM70-NEXT: shl.b32 %r14, %r13, 16;
; SM70-NEXT: setp.eq.f32 %p10, %r14, 0f00000000;
; SM70-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10;
; SM70-NEXT: mov.b32 %r15, {%rs18, %rs13};
; SM70-NEXT: setp.eq.f32 %p8, %r14, 0f00000000;
; SM70-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8;
; SM70-NEXT: mov.b32 %r15, {%rs14, %rs10};
; SM70-NEXT: st.param.b32 [func_retval0], %r15;
; SM70-NEXT: ret;
;
Expand Down
Loading
Loading