diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c8fe8971e593c..f34bf0ca7ede0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8610,19 +8610,16 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, // fminimum/fmaximum requires -0.0 less than +0.0 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) { - auto IsSpecificZero = [&](SDValue F) { - FloatSignAsInt State; - DAG.getSignAsIntValue(State, DL, F); - return DAG.getSetCC(DL, CCVT, State.IntValue, - DAG.getConstant(0, DL, State.IntValue.getValueType()), - IsMax ? ISD::SETEQ : ISD::SETNE); - }; SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ); - SDValue LCmp = - DAG.getSelect(DL, VT, IsSpecificZero(LHS), LHS, MinMax, Flags); - SDValue RCmp = DAG.getSelect(DL, VT, IsSpecificZero(RHS), RHS, LCmp, Flags); - MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags); + FloatSignAsInt State; + DAG.getSignAsIntValue(State, DL, LHS); + SDValue IsSpecificZero = + DAG.getSetCC(DL, CCVT, State.IntValue, + DAG.getConstant(0, DL, State.IntValue.getValueType()), + IsMax ? ISD::SETEQ : ISD::SETNE); + SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags); + MinMax = DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags); } return MinMax; diff --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll index 9f542abcb80f7..b47470fb78234 100644 --- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll +++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll @@ -46,51 +46,46 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind { ; CHECK-LABEL: maximum_fp128: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: str q0, [sp, #64] +; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: ldrb w8, [sp, #79] ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-NEXT: stp q1, q0, [sp, #48] -; CHECK-NEXT: bl __gttf2 -; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.le .LBB1_2 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov v2.16b, v0.16b ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: bl __unordtf2 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.eq .LBB1_4 +; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: b.le .LBB1_4 ; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: mov v2.16b, v0.16b ; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: ldrb w8, [sp, #79] -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.ne .LBB1_6 +; CHECK-NEXT: str q2, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: b.eq .LBB1_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: .LBB1_6: -; CHECK-NEXT: ldrb w8, [sp, #63] -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.ne .LBB1_8 -; CHECK-NEXT: // %bb.7: -; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: .LBB1_8: +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI1_1 -; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1] -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: bl __eqtf2 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: b.ne .LBB1_10 -; CHECK-NEXT: // %bb.9: -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: .LBB1_10: +; CHECK-NEXT: b.ne .LBB1_8 +; CHECK-NEXT: // %bb.7: +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: .LBB1_8: ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll index a3ab144356e16..f3e3e17a22eaf 100644 --- a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll +++ b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll @@ -4,7 +4,7 @@ define double @maximum_double(double %x, double %y) nounwind { ; CHECK-LABEL: maximum_double: ; CHECK: @ %bb.0: -; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: sub sp, sp, #8 ; CHECK-NEXT: vmov d17, r2, r3 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: vmov d16, r0, r1 @@ -12,32 +12,26 @@ define double @maximum_double(double %x, double %y) nounwind { ; CHECK-NEXT: vcmp.f64 d16, d17 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vstr d16, [sp, #8] -; CHECK-NEXT: vstr d17, [sp] -; CHECK-NEXT: ldrb r1, [sp, #15] +; CHECK-NEXT: vldr d18, .LCPI0_0 ; CHECK-NEXT: vmov.f64 d19, d17 +; CHECK-NEXT: vstr d16, [sp] +; CHECK-NEXT: ldrb r1, [sp, #7] ; CHECK-NEXT: clz r1, r1 -; CHECK-NEXT: vldr d18, .LCPI0_0 ; CHECK-NEXT: movwvs r2, #1 ; CHECK-NEXT: movwgt r3, #1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: vmovne.f64 d19, d16 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: ldrb r2, [sp, #7] ; CHECK-NEXT: vmovne.f64 d19, d18 ; CHECK-NEXT: lsrs r1, r1, #5 -; CHECK-NEXT: clz r1, r2 ; CHECK-NEXT: vcmp.f64 d19, #0 -; CHECK-NEXT: vmov.f64 d18, d19 -; CHECK-NEXT: vmovne.f64 d18, d16 -; CHECK-NEXT: lsrs r1, r1, #5 -; CHECK-NEXT: vmovne.f64 d18, d17 +; CHECK-NEXT: vmovne.f64 d17, d16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: movweq r0, #1 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmovne.f64 d19, d18 +; CHECK-NEXT: vmovne.f64 d19, d17 ; CHECK-NEXT: vmov r0, r1, d19 -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: add sp, sp, #8 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll index 096649e5bde43..e16ddb778099c 100644 --- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll @@ -1351,30 +1351,28 @@ define bfloat @test_roundeven(bfloat %a) { define bfloat @test_maximum(bfloat %a, bfloat %b) { ; SM70-LABEL: test_maximum( ; SM70: { -; SM70-NEXT: .reg .pred %p<6>; -; SM70-NEXT: .reg .b16 %rs<8>; +; SM70-NEXT: .reg .pred %p<5>; +; SM70-NEXT: .reg .b16 %rs<7>; ; SM70-NEXT: .reg .b32 %r<7>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %rs1, [test_maximum_param_0]; +; SM70-NEXT: setp.eq.s16 %p1, %rs1, 0; ; SM70-NEXT: ld.param.b16 %rs2, [test_maximum_param_1]; +; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; SM70-NEXT: cvt.u32.u16 %r1, %rs2; ; SM70-NEXT: shl.b32 %r2, %r1, 16; ; SM70-NEXT: cvt.u32.u16 %r3, %rs1; ; SM70-NEXT: shl.b32 %r4, %r3, 16; -; SM70-NEXT: setp.gt.f32 %p1, %r4, %r2; -; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; -; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2; -; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2; -; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0; -; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0; -; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; -; SM70-NEXT: cvt.u32.u16 %r5, %rs4; +; SM70-NEXT: setp.gt.f32 %p2, %r4, %r2; +; SM70-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2; +; SM70-NEXT: setp.nan.f32 %p3, %r4, %r2; +; SM70-NEXT: selp.b16 %rs5, 0x7FC0, %rs4, %p3; +; SM70-NEXT: cvt.u32.u16 %r5, %rs5; ; SM70-NEXT: shl.b32 %r6, %r5, 16; -; SM70-NEXT: setp.eq.f32 %p5, %r6, 0f00000000; -; SM70-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; -; SM70-NEXT: st.param.b16 [func_retval0], %rs7; +; SM70-NEXT: setp.eq.f32 %p4, %r6, 0f00000000; +; SM70-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4; +; SM70-NEXT: st.param.b16 [func_retval0], %rs6; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_maximum( @@ -1475,48 +1473,44 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-LABEL: test_maximum_v2( ; SM70: { -; SM70-NEXT: .reg .pred %p<11>; -; SM70-NEXT: .reg .b16 %rs<19>; +; SM70-NEXT: .reg .pred %p<9>; +; SM70-NEXT: .reg .b16 %rs<15>; ; SM70-NEXT: .reg .b32 %r<16>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b32 %r1, [test_maximum_v2_param_0]; ; SM70-NEXT: ld.param.b32 %r2, [test_maximum_v2_param_1]; ; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; SM70-NEXT: setp.eq.s16 %p1, %rs4, 0; +; SM70-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1; ; SM70-NEXT: cvt.u32.u16 %r3, %rs2; ; SM70-NEXT: shl.b32 %r4, %r3, 16; -; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r1; ; SM70-NEXT: cvt.u32.u16 %r5, %rs4; ; SM70-NEXT: shl.b32 %r6, %r5, 16; -; SM70-NEXT: setp.gt.f32 %p1, %r6, %r4; -; SM70-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; -; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4; -; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2; -; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0; -; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; -; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0; -; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; -; SM70-NEXT: cvt.u32.u16 %r7, %rs6; +; SM70-NEXT: setp.gt.f32 %p2, %r6, %r4; +; SM70-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2; +; SM70-NEXT: setp.nan.f32 %p3, %r6, %r4; +; SM70-NEXT: selp.b16 %rs9, 0x7FC0, %rs8, %p3; +; SM70-NEXT: cvt.u32.u16 %r7, %rs9; ; SM70-NEXT: shl.b32 %r8, %r7, 16; -; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000; -; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; +; SM70-NEXT: setp.eq.f32 %p4, %r8, 0f00000000; +; SM70-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4; +; SM70-NEXT: setp.eq.s16 %p5, %rs3, 0; +; SM70-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5; ; SM70-NEXT: cvt.u32.u16 %r9, %rs1; ; SM70-NEXT: shl.b32 %r10, %r9, 16; ; SM70-NEXT: cvt.u32.u16 %r11, %rs3; ; SM70-NEXT: shl.b32 %r12, %r11, 16; ; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10; -; SM70-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; +; SM70-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; ; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10; -; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7; -; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0; -; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; -; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0; -; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; -; SM70-NEXT: cvt.u32.u16 %r13, %rs15; +; SM70-NEXT: selp.b16 %rs13, 0x7FC0, %rs12, %p7; +; SM70-NEXT: cvt.u32.u16 %r13, %rs13; ; SM70-NEXT: shl.b32 %r14, %r13, 16; -; SM70-NEXT: setp.eq.f32 %p10, %r14, 0f00000000; -; SM70-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; -; SM70-NEXT: mov.b32 %r15, {%rs18, %rs13}; +; SM70-NEXT: setp.eq.f32 %p8, %r14, 0f00000000; +; SM70-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8; +; SM70-NEXT: mov.b32 %r15, {%rs14, %rs10}; ; SM70-NEXT: st.param.b32 [func_retval0], %r15; ; SM70-NEXT: ret; ; diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index 441fdec7ce5c0..d84d725bf72cf 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -612,27 +612,25 @@ define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) { define half @minimum_half(half %a, half %b) { ; CHECK-NOF16-LABEL: minimum_half( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<6>; -; CHECK-NOF16-NEXT: .reg .b16 %rs<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; +; CHECK-NOF16-NEXT: setp.ne.s16 %p1, %rs1, 0; ; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; +; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; -; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1; -; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; -; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; -; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-NOF16-NEXT: setp.ne.s16 %p3, %rs1, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; -; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; +; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %r2, %r1; +; CHECK-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2; +; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1; +; CHECK-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs5; +; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs6; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_half( @@ -648,27 +646,25 @@ define half @minimum_half(half %a, half %b) { ; ; CHECK-SM80-NOF16-LABEL: minimum_half( ; CHECK-SM80-NOF16: { -; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; -; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>; +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [minimum_half_param_0]; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p1, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [minimum_half_param_1]; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; -; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %r2, %r1; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; -; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p3, %rs1, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; -; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; +; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p2, %r2, %r1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs5; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs6; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.minimum.f16(half %a, half %b) ret half %x @@ -677,22 +673,20 @@ define half @minimum_half(half %a, half %b) { define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-LABEL: minimum_float( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_param_0]; +; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_param_1]; -; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; -; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.ne.s32 %p3, %r2, 0; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r2; +; CHECK-NOF16-NEXT: min.f32 %r4, %r1, %r2; +; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r5, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float( @@ -728,13 +722,13 @@ define float @minimum_imm1(float %a) { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm1_param_0]; -; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; -; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; -; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; +; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0; +; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1; +; CHECK-NOF16-NEXT: min.f32 %r3, %r1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NOF16-NEXT: ret; ; @@ -769,13 +763,13 @@ define float @minimum_imm2(float %a) { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_imm2_param_0]; -; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; -; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; -; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; +; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0; +; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1; +; CHECK-NOF16-NEXT: min.f32 %r3, %r1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NOF16-NEXT: ret; ; @@ -805,22 +799,20 @@ define float @minimum_imm2(float %a) { define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-LABEL: minimum_float_ftz( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0]; +; CHECK-NOF16-NEXT: setp.ne.s32 %p1, %r1, 0; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1]; -; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; -; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.ne.s32 %p3, %r2, 0; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1; +; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p2, %r1, %r2; +; CHECK-NOF16-NEXT: min.ftz.f32 %r4, %r1, %r2; +; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2; +; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p3, %r5, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: minimum_float_ftz( @@ -851,22 +843,20 @@ define float @minimum_float_ftz(float %a, float %b) #1 { define double @minimum_double(double %a, double %b) { ; CHECK-LABEL: minimum_double( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<5>; -; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [minimum_double_param_0]; +; CHECK-NEXT: setp.ne.s64 %p1, %rd1, 0; ; CHECK-NEXT: ld.param.b64 %rd2, [minimum_double_param_1]; -; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; -; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2; -; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.ne.s64 %p2, %rd1, 0; -; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.ne.s64 %p3, %rd2, 0; -; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; -; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; -; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; +; CHECK-NEXT: selp.f64 %rd3, %rd1, %rd2, %p1; +; CHECK-NEXT: setp.nan.f64 %p2, %rd1, %rd2; +; CHECK-NEXT: min.f64 %rd4, %rd1, %rd2; +; CHECK-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p2; +; CHECK-NEXT: setp.eq.f64 %p3, %rd5, 0d0000000000000000; +; CHECK-NEXT: selp.f64 %rd6, %rd3, %rd5, %p3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; ; CHECK-NEXT: ret; %x = call double @llvm.minimum.f64(double %a, double %b) ret double %x @@ -875,42 +865,38 @@ define double @minimum_double(double %a, double %b) { define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: minimum_v2half( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-NOF16-NEXT: .reg .pred %p<9>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; ; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: setp.ne.s16 %p1, %rs4, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; -; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; -; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-NOF16-NEXT: setp.ne.s16 %p3, %rs4, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; -; CHECK-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; -; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; +; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %r4, %r3; +; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2; +; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3; +; CHECK-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs9; +; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4; +; CHECK-NOF16-NEXT: setp.ne.s16 %p5, %rs3, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: setp.lt.f32 %p6, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; +; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; -; CHECK-NOF16-NEXT: setp.ne.s16 %p8, %rs3, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; -; CHECK-NOF16-NEXT: setp.ne.s16 %p9, %rs1, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; -; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; -; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; +; CHECK-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs13; +; CHECK-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8; +; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NOF16-NEXT: ret; ; @@ -927,42 +913,38 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; ; CHECK-SM80-NOF16-LABEL: minimum_v2half( ; CHECK-SM80-NOF16: { -; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<9>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_v2half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_v2half_param_1]; ; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p1, %rs4, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; -; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p3, %rs4, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; -; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; -; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; +; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p2, %r4, %r3; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs9; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p5, %rs3, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p6, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; -; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p8, %rs3, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; -; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p9, %rs1, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; -; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; -; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs13; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10}; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) @@ -1153,27 +1135,25 @@ define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) { define half @maximum_half(half %a, half %b) { ; CHECK-NOF16-LABEL: maximum_half( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<6>; -; CHECK-NOF16-NEXT: .reg .b16 %rs<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; +; CHECK-NOF16-NEXT: setp.eq.s16 %p1, %rs1, 0; ; CHECK-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; +; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; -; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1; -; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; -; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; -; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; -; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; +; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %r2, %r1; +; CHECK-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2; +; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1; +; CHECK-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs5; +; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4; +; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs6; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_half( @@ -1189,27 +1169,25 @@ define half @maximum_half(half %a, half %b) { ; ; CHECK-SM80-NOF16-LABEL: maximum_half( ; CHECK-SM80-NOF16: { -; CHECK-SM80-NOF16-NEXT: .reg .pred %p<6>; -; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<8>; +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<5>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<7>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<4>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs1, [maximum_half_param_0]; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p1, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: ld.param.b16 %rs2, [maximum_half_param_1]; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r1, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r2, %rs1; -; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %r2, %r1; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; -; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs6, %rs4, %p5; -; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs7; +; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p2, %r2, %r1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, %rs1, %rs2, %p2; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r2, %r1; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, 0x7E00, %rs4, %p3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs5; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r3, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs3, %rs5, %p4; +; CHECK-SM80-NOF16-NEXT: st.param.b16 [func_retval0], %rs6; ; CHECK-SM80-NOF16-NEXT: ret; %x = call half @llvm.maximum.f16(half %a, half %b) ret half %x @@ -1218,17 +1196,19 @@ define half @maximum_half(half %a, half %b) { define float @maximum_imm1(float %a) { ; CHECK-NOF16-LABEL: maximum_imm1( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<3>; -; CHECK-NOF16-NEXT: .reg .b32 %r<5>; +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<6>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm1_param_0]; -; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; -; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0; +; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1; +; CHECK-NOF16-NEXT: max.f32 %r3, %r1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm1( @@ -1257,17 +1237,19 @@ define float @maximum_imm1(float %a) { define float @maximum_imm2(float %a) { ; CHECK-NOF16-LABEL: maximum_imm2( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<3>; -; CHECK-NOF16-NEXT: .reg .b32 %r<5>; +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<6>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_imm2_param_0]; -; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; -; CHECK-NOF16-NEXT: max.f32 %r2, %r1, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r3, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f00000000, %r3, %p2; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0; +; CHECK-NOF16-NEXT: selp.f32 %r2, %r1, 0f00000000, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r1; +; CHECK-NOF16-NEXT: max.f32 %r3, %r1, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r4, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r5, %r2, %r4, %p3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_imm2( @@ -1296,22 +1278,20 @@ define float @maximum_imm2(float %a) { define float @maximum_float(float %a, float %b) { ; CHECK-NOF16-LABEL: maximum_float( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_param_0]; +; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_param_1]; -; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; -; CHECK-NOF16-NEXT: max.f32 %r3, %r1, %r2; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1; +; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r1, %r2; +; CHECK-NOF16-NEXT: max.f32 %r4, %r1, %r2; +; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2; +; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r5, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float( @@ -1342,22 +1322,20 @@ define float @maximum_float(float %a, float %b) { define float @maximum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-LABEL: maximum_float_ftz( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<5>; -; CHECK-NOF16-NEXT: .reg .b32 %r<8>; +; CHECK-NOF16-NEXT: .reg .pred %p<4>; +; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0]; +; CHECK-NOF16-NEXT: setp.eq.s32 %p1, %r1, 0; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1]; -; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; -; CHECK-NOF16-NEXT: max.ftz.f32 %r3, %r1, %r2; -; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0; -; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0; -; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; -; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; -; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; -; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NOF16-NEXT: selp.f32 %r3, %r1, %r2, %p1; +; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p2, %r1, %r2; +; CHECK-NOF16-NEXT: max.ftz.f32 %r4, %r1, %r2; +; CHECK-NOF16-NEXT: selp.f32 %r5, 0f7FC00000, %r4, %p2; +; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p3, %r5, 0f00000000; +; CHECK-NOF16-NEXT: selp.f32 %r6, %r3, %r5, %p3; +; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NOF16-NEXT: ret; ; ; CHECK-F16-LABEL: maximum_float_ftz( @@ -1388,22 +1366,20 @@ define float @maximum_float_ftz(float %a, float %b) #1 { define double @maximum_double(double %a, double %b) { ; CHECK-LABEL: maximum_double( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<5>; -; CHECK-NEXT: .reg .b64 %rd<8>; +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b64 %rd<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [maximum_double_param_0]; +; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0; ; CHECK-NEXT: ld.param.b64 %rd2, [maximum_double_param_1]; -; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; -; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2; -; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.eq.s64 %p2, %rd1, 0; -; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.eq.s64 %p3, %rd2, 0; -; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; -; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; -; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; +; CHECK-NEXT: selp.f64 %rd3, %rd1, %rd2, %p1; +; CHECK-NEXT: setp.nan.f64 %p2, %rd1, %rd2; +; CHECK-NEXT: max.f64 %rd4, %rd1, %rd2; +; CHECK-NEXT: selp.f64 %rd5, 0d7FF8000000000000, %rd4, %p2; +; CHECK-NEXT: setp.eq.f64 %p3, %rd5, 0d0000000000000000; +; CHECK-NEXT: selp.f64 %rd6, %rd3, %rd5, %p3; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; ; CHECK-NEXT: ret; %x = call double @llvm.maximum.f64(double %a, double %b) ret double %x @@ -1412,42 +1388,38 @@ define double @maximum_double(double %a, double %b) { define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: maximum_v2half( ; CHECK-NOF16: { -; CHECK-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-NOF16-NEXT: .reg .pred %p<9>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; ; CHECK-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; ; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: setp.eq.s16 %p1, %rs4, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; -; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; -; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; -; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; +; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %r4, %r3; +; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2; +; CHECK-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3; +; CHECK-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs9; +; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4; +; CHECK-NOF16-NEXT: setp.eq.s16 %p5, %rs3, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: setp.gt.f32 %p6, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; +; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; -; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; -; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; -; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; -; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; +; CHECK-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs13; +; CHECK-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000; +; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8; +; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NOF16-NEXT: ret; ; @@ -1464,42 +1436,38 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; ; CHECK-SM80-NOF16-LABEL: maximum_v2half( ; CHECK-SM80-NOF16: { -; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>; +; CHECK-SM80-NOF16-NEXT: .reg .pred %p<9>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_v2half_param_0]; ; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_v2half_param_1]; ; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-SM80-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p1, %rs4, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs2, %p1; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; -; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; -; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; +; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p2, %r4, %r3; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs2, %p2; +; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p3, %r4, %r3; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, 0x7E00, %rs8, %p3; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs9; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p4, %r5, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs9, %p4; +; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p5, %rs3, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, %rs3, %rs1, %p5; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p6, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs1, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; -; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; -; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, 0x7E00, %rs12, %p7; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs13; +; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p8, %r8, 0f00000000; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs11, %rs13, %p8; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs10}; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll index 48107c8f63727..33a3155ab62bb 100644 --- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll @@ -5,41 +5,38 @@ define fp128 @f128_minimum(fp128 %a, fp128 %b) { ; CHECK-LABEL: f128_minimum: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stxv 34, -16(1) -; CHECK-NEXT: stxv 35, -32(1) -; CHECK-NEXT: xscmpuqp 0, 2, 3 -; CHECK-NEXT: vmr 4, 2 -; CHECK-NEXT: blt 0, .LBB0_2 +; CHECK-NEXT: xxlor 0, 34, 34 +; CHECK-NEXT: lbz 3, -1(1) +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: beq 0, .LBB0_7 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: vmr 4, 3 +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: bge 0, .LBB0_8 ; CHECK-NEXT: .LBB0_2: # %entry ; CHECK-NEXT: bnu 0, .LBB0_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-NEXT: lxv 36, 0(3) +; CHECK-NEXT: lxv 34, 0(3) ; CHECK-NEXT: .LBB0_4: # %entry -; CHECK-NEXT: lbz 3, -1(1) -; CHECK-NEXT: cmplwi 3, 0 -; CHECK-NEXT: bne 0, .LBB0_6 +; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NEXT: lxv 35, 0(3) +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: beq 0, .LBB0_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: xxlor 0, 34, 34 ; CHECK-NEXT: .LBB0_6: # %entry -; CHECK-NEXT: lbz 3, -17(1) -; CHECK-NEXT: cmplwi 3, 0 -; CHECK-NEXT: bne 0, .LBB0_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: vmr 3, 2 +; CHECK-NEXT: xxlor 34, 0, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_7: # %entry +; CHECK-NEXT: xxlor 0, 35, 35 +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: blt 0, .LBB0_2 ; CHECK-NEXT: .LBB0_8: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l -; CHECK-NEXT: lxv 34, 0(3) -; CHECK-NEXT: xscmpuqp 0, 4, 2 -; CHECK-NEXT: beq 0, .LBB0_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: vmr 3, 4 -; CHECK-NEXT: .LBB0_10: # %entry ; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-NEXT: bun 0, .LBB0_3 +; CHECK-NEXT: b .LBB0_4 entry: %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b) ret fp128 %m @@ -49,41 +46,38 @@ define fp128 @f128_maximum(fp128 %a, fp128 %b) { ; CHECK-LABEL: f128_maximum: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stxv 34, -16(1) -; CHECK-NEXT: stxv 35, -32(1) -; CHECK-NEXT: xscmpuqp 0, 2, 3 -; CHECK-NEXT: vmr 4, 2 -; CHECK-NEXT: bgt 0, .LBB1_2 +; CHECK-NEXT: xxlor 0, 34, 34 +; CHECK-NEXT: lbz 3, -1(1) +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: bne 0, .LBB1_7 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: vmr 4, 3 +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: ble 0, .LBB1_8 ; CHECK-NEXT: .LBB1_2: # %entry ; CHECK-NEXT: bnu 0, .LBB1_4 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l -; CHECK-NEXT: lxv 36, 0(3) +; CHECK-NEXT: lxv 34, 0(3) ; CHECK-NEXT: .LBB1_4: # %entry -; CHECK-NEXT: lbz 3, -1(1) -; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l +; CHECK-NEXT: lxv 35, 0(3) +; CHECK-NEXT: xscmpuqp 0, 2, 3 ; CHECK-NEXT: beq 0, .LBB1_6 ; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: xxlor 0, 34, 34 ; CHECK-NEXT: .LBB1_6: # %entry -; CHECK-NEXT: lbz 3, -17(1) -; CHECK-NEXT: cmplwi 3, 0 -; CHECK-NEXT: beq 0, .LBB1_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: vmr 3, 2 +; CHECK-NEXT: xxlor 34, 0, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_7: # %entry +; CHECK-NEXT: xxlor 0, 35, 35 +; CHECK-NEXT: xscmpuqp 0, 2, 3 +; CHECK-NEXT: bgt 0, .LBB1_2 ; CHECK-NEXT: .LBB1_8: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l -; CHECK-NEXT: lxv 34, 0(3) -; CHECK-NEXT: xscmpuqp 0, 4, 2 -; CHECK-NEXT: beq 0, .LBB1_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: vmr 3, 4 -; CHECK-NEXT: .LBB1_10: # %entry ; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-NEXT: bun 0, .LBB1_3 +; CHECK-NEXT: b .LBB1_4 entry: %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b) ret fp128 %m diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll index e199a1eab49d5..6bf84a4893b5f 100644 --- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -6,41 +6,37 @@ define float @f32_minimum(float %a, float %b) { ; NOVSX-LABEL: f32_minimum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: fcmpu 0, 1, 2 -; NOVSX-NEXT: fmr 3, 1 -; NOVSX-NEXT: stfs 1, -8(1) -; NOVSX-NEXT: stfs 2, -4(1) -; NOVSX-NEXT: bc 12, 0, .LBB0_2 +; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB0_7 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 3, 2 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 4, 0, .LBB0_8 ; NOVSX-NEXT: .LBB0_2: # %entry ; NOVSX-NEXT: bc 4, 3, .LBB0_4 -; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: .LBB0_3: ; NOVSX-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; NOVSX-NEXT: lfs 3, .LCPI0_0@toc@l(3) +; NOVSX-NEXT: lfs 1, .LCPI0_0@toc@l(3) ; NOVSX-NEXT: .LBB0_4: # %entry -; NOVSX-NEXT: lwz 3, -8(1) -; NOVSX-NEXT: fmr 0, 3 -; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; NOVSX-NEXT: lfs 2, .LCPI0_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 1, 2 ; NOVSX-NEXT: bc 12, 2, .LBB0_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 0, 1 ; NOVSX-NEXT: .LBB0_6: # %entry -; NOVSX-NEXT: lwz 3, -4(1) -; NOVSX-NEXT: cmpwi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB0_8 -; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 0, 2 -; NOVSX-NEXT: .LBB0_8: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; NOVSX-NEXT: lfs 1, .LCPI0_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 3, 1 -; NOVSX-NEXT: bc 12, 2, .LBB0_10 -; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 0, 3 -; NOVSX-NEXT: .LBB0_10: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB0_7: # %entry +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB0_2 +; NOVSX-NEXT: .LBB0_8: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: bc 12, 3, .LBB0_3 +; NOVSX-NEXT: b .LBB0_4 ; ; VSX-LABEL: f32_minimum: ; VSX: # %bb.0: # %entry @@ -73,40 +69,37 @@ entry: define float @f32_maximum(float %a, float %b) { ; NOVSX-LABEL: f32_maximum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: stfs 1, -4(1) ; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfs 1, -8(1) -; NOVSX-NEXT: stfs 2, -4(1) -; NOVSX-NEXT: bc 12, 1, .LBB1_2 +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB1_7 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 4, 1, .LBB1_8 ; NOVSX-NEXT: .LBB1_2: # %entry ; NOVSX-NEXT: bc 4, 3, .LBB1_4 -; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: .LBB1_3: ; NOVSX-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI1_0@toc@l(3) +; NOVSX-NEXT: lfs 1, .LCPI1_0@toc@l(3) ; NOVSX-NEXT: .LBB1_4: # %entry -; NOVSX-NEXT: lwz 3, -8(1) -; NOVSX-NEXT: cmpwi 3, 0 +; NOVSX-NEXT: addis 3, 2, .LCPI1_1@toc@ha +; NOVSX-NEXT: lfs 2, .LCPI1_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 1, 2 ; NOVSX-NEXT: bc 12, 2, .LBB1_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: fmr 0, 1 ; NOVSX-NEXT: .LBB1_6: # %entry -; NOVSX-NEXT: lwz 3, -4(1) -; NOVSX-NEXT: cmpwi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB1_8 -; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB1_7: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB1_2 ; NOVSX-NEXT: .LBB1_8: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI1_1@toc@ha -; NOVSX-NEXT: lfs 1, .LCPI1_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 0, 1 -; NOVSX-NEXT: bc 12, 2, .LBB1_10 -; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 2, 0 -; NOVSX-NEXT: .LBB1_10: # %entry ; NOVSX-NEXT: fmr 1, 2 -; NOVSX-NEXT: blr +; NOVSX-NEXT: bc 12, 3, .LBB1_3 +; NOVSX-NEXT: b .LBB1_4 ; ; VSX-LABEL: f32_maximum: ; VSX: # %bb.0: # %entry @@ -139,41 +132,37 @@ entry: define double @f64_minimum(double %a, double %b) { ; NOVSX-LABEL: f64_minimum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: fcmpu 0, 1, 2 -; NOVSX-NEXT: fmr 3, 1 -; NOVSX-NEXT: stfd 1, -16(1) -; NOVSX-NEXT: stfd 2, -8(1) -; NOVSX-NEXT: bc 12, 0, .LBB2_2 +; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB2_7 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 3, 2 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 4, 0, .LBB2_8 ; NOVSX-NEXT: .LBB2_2: # %entry ; NOVSX-NEXT: bc 4, 3, .LBB2_4 -; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: .LBB2_3: ; NOVSX-NEXT: addis 3, 2, .LCPI2_0@toc@ha -; NOVSX-NEXT: lfs 3, .LCPI2_0@toc@l(3) +; NOVSX-NEXT: lfs 1, .LCPI2_0@toc@l(3) ; NOVSX-NEXT: .LBB2_4: # %entry -; NOVSX-NEXT: ld 3, -16(1) -; NOVSX-NEXT: fmr 0, 3 -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: addis 3, 2, .LCPI2_1@toc@ha +; NOVSX-NEXT: lfs 2, .LCPI2_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 1, 2 ; NOVSX-NEXT: bc 12, 2, .LBB2_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 0, 1 ; NOVSX-NEXT: .LBB2_6: # %entry -; NOVSX-NEXT: ld 3, -8(1) -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB2_8 -; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 0, 2 -; NOVSX-NEXT: .LBB2_8: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI2_1@toc@ha -; NOVSX-NEXT: lfs 1, .LCPI2_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 3, 1 -; NOVSX-NEXT: bc 12, 2, .LBB2_10 -; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 0, 3 -; NOVSX-NEXT: .LBB2_10: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB2_7: # %entry +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 0, .LBB2_2 +; NOVSX-NEXT: .LBB2_8: # %entry +; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: bc 12, 3, .LBB2_3 +; NOVSX-NEXT: b .LBB2_4 ; ; VSX-LABEL: f64_minimum: ; VSX: # %bb.0: # %entry @@ -206,40 +195,37 @@ entry: define double @f64_maximum(double %a, double %b) { ; NOVSX-LABEL: f64_maximum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: stfd 1, -8(1) ; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfd 1, -16(1) -; NOVSX-NEXT: stfd 2, -8(1) -; NOVSX-NEXT: bc 12, 1, .LBB3_2 +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB3_7 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 4, 1, .LBB3_8 ; NOVSX-NEXT: .LBB3_2: # %entry ; NOVSX-NEXT: bc 4, 3, .LBB3_4 -; NOVSX-NEXT: # %bb.3: +; NOVSX-NEXT: .LBB3_3: ; NOVSX-NEXT: addis 3, 2, .LCPI3_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI3_0@toc@l(3) +; NOVSX-NEXT: lfs 1, .LCPI3_0@toc@l(3) ; NOVSX-NEXT: .LBB3_4: # %entry -; NOVSX-NEXT: ld 3, -16(1) -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: addis 3, 2, .LCPI3_1@toc@ha +; NOVSX-NEXT: lfs 2, .LCPI3_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 1, 2 ; NOVSX-NEXT: bc 12, 2, .LBB3_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: fmr 0, 1 ; NOVSX-NEXT: .LBB3_6: # %entry -; NOVSX-NEXT: ld 3, -8(1) -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB3_8 -; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB3_7: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fcmpu 0, 1, 2 +; NOVSX-NEXT: bc 12, 1, .LBB3_2 ; NOVSX-NEXT: .LBB3_8: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI3_1@toc@ha -; NOVSX-NEXT: lfs 1, .LCPI3_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 0, 1 -; NOVSX-NEXT: bc 12, 2, .LBB3_10 -; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 2, 0 -; NOVSX-NEXT: .LBB3_10: # %entry ; NOVSX-NEXT: fmr 1, 2 -; NOVSX-NEXT: blr +; NOVSX-NEXT: bc 12, 3, .LBB3_3 +; NOVSX-NEXT: b .LBB3_4 ; ; VSX-LABEL: f64_maximum: ; VSX: # %bb.0: # %entry @@ -286,10 +272,7 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; NOVSX-NEXT: vxor 5, 5, 5 ; NOVSX-NEXT: vcmpequw 0, 2, 5 ; NOVSX-NEXT: vnot 0, 0 -; NOVSX-NEXT: vsel 2, 4, 2, 0 -; NOVSX-NEXT: vcmpequw 0, 3, 5 -; NOVSX-NEXT: vnot 0, 0 -; NOVSX-NEXT: vsel 2, 2, 3, 0 +; NOVSX-NEXT: vsel 2, 3, 2, 0 ; NOVSX-NEXT: vcmpeqfp 3, 4, 5 ; NOVSX-NEXT: vsel 2, 4, 2, 3 ; NOVSX-NEXT: blr @@ -309,11 +292,8 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; VSX-NEXT: lxvd2x 2, 0, 3 ; VSX-NEXT: xxsel 0, 0, 2, 1 ; VSX-NEXT: xxlnor 1, 37, 37 -; VSX-NEXT: xxsel 1, 0, 34, 1 -; VSX-NEXT: vcmpequw 2, 3, 4 -; VSX-NEXT: xxlnor 2, 34, 34 -; VSX-NEXT: xxsel 1, 1, 35, 2 ; VSX-NEXT: xvcmpeqsp 2, 0, 36 +; VSX-NEXT: xxsel 1, 35, 34, 1 ; VSX-NEXT: xxsel 34, 0, 1, 2 ; VSX-NEXT: blr ; @@ -331,11 +311,8 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; AIX-NEXT: lxvw4x 2, 0, 3 ; AIX-NEXT: xxsel 0, 0, 2, 1 ; AIX-NEXT: xxlnor 1, 37, 37 -; AIX-NEXT: xxsel 1, 0, 34, 1 -; AIX-NEXT: vcmpequw 2, 3, 4 -; AIX-NEXT: xxlnor 2, 34, 34 -; AIX-NEXT: xxsel 1, 1, 35, 2 ; AIX-NEXT: xvcmpeqsp 2, 0, 36 +; AIX-NEXT: xxsel 1, 35, 34, 1 ; AIX-NEXT: xxsel 34, 0, 1, 2 ; AIX-NEXT: blr entry: @@ -359,9 +336,7 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; NOVSX-NEXT: vsel 4, 4, 0, 5 ; NOVSX-NEXT: vxor 5, 5, 5 ; NOVSX-NEXT: vcmpequw 0, 2, 5 -; NOVSX-NEXT: vsel 2, 4, 2, 0 -; NOVSX-NEXT: vcmpequw 0, 3, 5 -; NOVSX-NEXT: vsel 2, 2, 3, 0 +; NOVSX-NEXT: vsel 2, 3, 2, 0 ; NOVSX-NEXT: vcmpeqfp 3, 4, 5 ; NOVSX-NEXT: vsel 2, 4, 2, 3 ; NOVSX-NEXT: blr @@ -380,10 +355,8 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; VSX-NEXT: xxlor 1, 2, 1 ; VSX-NEXT: lxvd2x 2, 0, 3 ; VSX-NEXT: xxsel 0, 0, 2, 1 +; VSX-NEXT: xxsel 1, 35, 34, 37 ; VSX-NEXT: xvcmpeqsp 2, 0, 36 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequw 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 ; VSX-NEXT: xxsel 34, 0, 1, 2 ; VSX-NEXT: blr ; @@ -400,10 +373,8 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) { ; AIX-NEXT: xxlor 1, 2, 1 ; AIX-NEXT: lxvw4x 2, 0, 3 ; AIX-NEXT: xxsel 0, 0, 2, 1 +; AIX-NEXT: xxsel 1, 35, 34, 37 ; AIX-NEXT: xvcmpeqsp 2, 0, 36 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequw 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 ; AIX-NEXT: xxsel 34, 0, 1, 2 ; AIX-NEXT: blr entry: @@ -414,75 +385,63 @@ entry: define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; NOVSX-LABEL: v2f64_minimum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: fcmpu 0, 1, 3 +; NOVSX-NEXT: stfd 1, -8(1) ; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfd 1, -16(1) -; NOVSX-NEXT: stfd 3, -8(1) -; NOVSX-NEXT: stfd 2, -32(1) -; NOVSX-NEXT: stfd 4, -24(1) -; NOVSX-NEXT: bc 12, 0, .LBB6_2 -; NOVSX-NEXT: # %bb.1: # %entry ; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: stfd 2, -16(1) +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB6_2 +; NOVSX-NEXT: # %bb.1: # %entry +; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB6_2: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; NOVSX-NEXT: lfs 5, .LCPI6_0@toc@l(3) -; NOVSX-NEXT: fmr 6, 5 -; NOVSX-NEXT: bc 12, 3, .LBB6_4 +; NOVSX-NEXT: fcmpu 0, 0, 3 +; NOVSX-NEXT: bc 12, 0, .LBB6_4 ; NOVSX-NEXT: # %bb.3: # %entry -; NOVSX-NEXT: fmr 6, 1 +; NOVSX-NEXT: fmr 0, 3 ; NOVSX-NEXT: .LBB6_4: # %entry -; NOVSX-NEXT: ld 3, -16(1) -; NOVSX-NEXT: fmr 1, 6 -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB6_6 +; NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha +; NOVSX-NEXT: lfs 3, .LCPI6_0@toc@l(3) +; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: bc 12, 3, .LBB6_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: fmr 6, 0 ; NOVSX-NEXT: .LBB6_6: # %entry -; NOVSX-NEXT: ld 3, -8(1) -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha +; NOVSX-NEXT: lfs 5, .LCPI6_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 6, 5 ; NOVSX-NEXT: bc 12, 2, .LBB6_8 ; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: fmr 1, 6 ; NOVSX-NEXT: .LBB6_8: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha -; NOVSX-NEXT: lfs 3, .LCPI6_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 6, 3 -; NOVSX-NEXT: bc 12, 2, .LBB6_10 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: fmr 0, 4 +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB6_13 ; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 1, 6 -; NOVSX-NEXT: .LBB6_10: # %entry ; NOVSX-NEXT: fcmpu 0, 2, 4 -; NOVSX-NEXT: fmr 0, 2 -; NOVSX-NEXT: bc 12, 0, .LBB6_12 -; NOVSX-NEXT: # %bb.11: # %entry -; NOVSX-NEXT: fmr 0, 4 +; NOVSX-NEXT: bc 4, 0, .LBB6_14 +; NOVSX-NEXT: .LBB6_10: # %entry +; NOVSX-NEXT: bc 4, 3, .LBB6_15 +; NOVSX-NEXT: .LBB6_11: # %entry +; NOVSX-NEXT: fcmpu 0, 3, 5 +; NOVSX-NEXT: bc 4, 2, .LBB6_16 ; NOVSX-NEXT: .LBB6_12: # %entry -; NOVSX-NEXT: bc 12, 3, .LBB6_14 -; NOVSX-NEXT: # %bb.13: # %entry -; NOVSX-NEXT: fmr 5, 0 -; NOVSX-NEXT: .LBB6_14: # %entry -; NOVSX-NEXT: ld 3, -32(1) -; NOVSX-NEXT: fmr 0, 5 -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB6_16 -; NOVSX-NEXT: # %bb.15: # %entry -; NOVSX-NEXT: fmr 0, 2 -; NOVSX-NEXT: .LBB6_16: # %entry -; NOVSX-NEXT: ld 3, -24(1) -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 4, 2, .LBB6_19 -; NOVSX-NEXT: # %bb.17: # %entry -; NOVSX-NEXT: fcmpu 0, 5, 3 -; NOVSX-NEXT: bc 4, 2, .LBB6_20 -; NOVSX-NEXT: .LBB6_18: # %entry ; NOVSX-NEXT: fmr 2, 0 ; NOVSX-NEXT: blr -; NOVSX-NEXT: .LBB6_19: # %entry -; NOVSX-NEXT: fmr 0, 4 -; NOVSX-NEXT: fcmpu 0, 5, 3 -; NOVSX-NEXT: bc 12, 2, .LBB6_18 -; NOVSX-NEXT: .LBB6_20: # %entry -; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: .LBB6_13: # %entry +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: bc 12, 0, .LBB6_10 +; NOVSX-NEXT: .LBB6_14: # %entry +; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: bc 12, 3, .LBB6_11 +; NOVSX-NEXT: .LBB6_15: # %entry +; NOVSX-NEXT: fmr 3, 2 +; NOVSX-NEXT: fcmpu 0, 3, 5 +; NOVSX-NEXT: bc 12, 2, .LBB6_12 +; NOVSX-NEXT: .LBB6_16: # %entry +; NOVSX-NEXT: fmr 0, 3 ; NOVSX-NEXT: fmr 2, 0 ; NOVSX-NEXT: blr ; @@ -501,10 +460,7 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; VSX-NEXT: vcmpequd 5, 2, 4 ; VSX-NEXT: xxlnor 37, 37, 37 ; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequd 2, 3, 4 -; VSX-NEXT: xxlnor 34, 34, 34 -; VSX-NEXT: xxsel 1, 1, 35, 34 +; VSX-NEXT: xxsel 1, 35, 34, 37 ; VSX-NEXT: xvcmpeqdp 34, 0, 36 ; VSX-NEXT: xxsel 34, 0, 1, 34 ; VSX-NEXT: blr @@ -523,10 +479,7 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; AIX-NEXT: vcmpequd 5, 2, 4 ; AIX-NEXT: xxlnor 37, 37, 37 ; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequd 2, 3, 4 -; AIX-NEXT: xxlnor 34, 34, 34 -; AIX-NEXT: xxsel 1, 1, 35, 34 +; AIX-NEXT: xxsel 1, 35, 34, 37 ; AIX-NEXT: xvcmpeqdp 34, 0, 36 ; AIX-NEXT: xxsel 34, 0, 1, 34 ; AIX-NEXT: blr @@ -538,71 +491,63 @@ entry: define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; NOVSX-LABEL: v2f64_maximum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: fcmpu 0, 1, 3 -; NOVSX-NEXT: fmr 6, 1 -; NOVSX-NEXT: stfd 1, -16(1) -; NOVSX-NEXT: stfd 3, -8(1) -; NOVSX-NEXT: stfd 2, -32(1) -; NOVSX-NEXT: stfd 4, -24(1) -; NOVSX-NEXT: bc 12, 1, .LBB7_2 +; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfd 2, -16(1) +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB7_2 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: fmr 1, 3 ; NOVSX-NEXT: .LBB7_2: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(3) -; NOVSX-NEXT: fmr 5, 0 -; NOVSX-NEXT: bc 12, 3, .LBB7_4 +; NOVSX-NEXT: fcmpu 0, 0, 3 +; NOVSX-NEXT: bc 12, 1, .LBB7_4 ; NOVSX-NEXT: # %bb.3: # %entry -; NOVSX-NEXT: fmr 5, 6 +; NOVSX-NEXT: fmr 0, 3 ; NOVSX-NEXT: .LBB7_4: # %entry -; NOVSX-NEXT: ld 3, -16(1) -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB7_6 +; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; NOVSX-NEXT: lfs 3, .LCPI7_0@toc@l(3) +; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: bc 12, 3, .LBB7_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: fmr 6, 0 ; NOVSX-NEXT: .LBB7_6: # %entry -; NOVSX-NEXT: ld 3, -8(1) -; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: addis 3, 2, .LCPI7_1@toc@ha +; NOVSX-NEXT: lfs 5, .LCPI7_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 6, 5 ; NOVSX-NEXT: bc 12, 2, .LBB7_8 ; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: fmr 1, 6 ; NOVSX-NEXT: .LBB7_8: # %entry -; NOVSX-NEXT: addis 3, 2, .LCPI7_1@toc@ha -; NOVSX-NEXT: lfs 1, .LCPI7_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 5, 1 -; NOVSX-NEXT: bc 12, 2, .LBB7_10 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB7_13 ; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 3, 5 -; NOVSX-NEXT: .LBB7_10: # %entry ; NOVSX-NEXT: fcmpu 0, 2, 4 -; NOVSX-NEXT: fmr 5, 2 -; NOVSX-NEXT: bc 12, 1, .LBB7_12 -; NOVSX-NEXT: # %bb.11: # %entry -; NOVSX-NEXT: fmr 5, 4 +; NOVSX-NEXT: bc 4, 1, .LBB7_14 +; NOVSX-NEXT: .LBB7_10: # %entry +; NOVSX-NEXT: bc 4, 3, .LBB7_15 +; NOVSX-NEXT: .LBB7_11: # %entry +; NOVSX-NEXT: fcmpu 0, 3, 5 +; NOVSX-NEXT: bc 4, 2, .LBB7_16 ; NOVSX-NEXT: .LBB7_12: # %entry -; NOVSX-NEXT: bc 12, 3, .LBB7_14 -; NOVSX-NEXT: # %bb.13: # %entry -; NOVSX-NEXT: fmr 0, 5 -; NOVSX-NEXT: .LBB7_14: # %entry -; NOVSX-NEXT: ld 3, -32(1) -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB7_16 -; NOVSX-NEXT: # %bb.15: # %entry ; NOVSX-NEXT: fmr 2, 0 -; NOVSX-NEXT: .LBB7_16: # %entry -; NOVSX-NEXT: ld 3, -24(1) -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB7_18 -; NOVSX-NEXT: # %bb.17: # %entry -; NOVSX-NEXT: fmr 4, 2 -; NOVSX-NEXT: .LBB7_18: # %entry -; NOVSX-NEXT: fcmpu 0, 0, 1 -; NOVSX-NEXT: bc 12, 2, .LBB7_20 -; NOVSX-NEXT: # %bb.19: # %entry -; NOVSX-NEXT: fmr 4, 0 -; NOVSX-NEXT: .LBB7_20: # %entry -; NOVSX-NEXT: fmr 1, 3 +; NOVSX-NEXT: blr +; NOVSX-NEXT: .LBB7_13: # %entry +; NOVSX-NEXT: fmr 0, 4 +; NOVSX-NEXT: fcmpu 0, 2, 4 +; NOVSX-NEXT: bc 12, 1, .LBB7_10 +; NOVSX-NEXT: .LBB7_14: # %entry ; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: bc 12, 3, .LBB7_11 +; NOVSX-NEXT: .LBB7_15: # %entry +; NOVSX-NEXT: fmr 3, 2 +; NOVSX-NEXT: fcmpu 0, 3, 5 +; NOVSX-NEXT: bc 12, 2, .LBB7_12 +; NOVSX-NEXT: .LBB7_16: # %entry +; NOVSX-NEXT: fmr 0, 3 +; NOVSX-NEXT: fmr 2, 0 ; NOVSX-NEXT: blr ; ; VSX-LABEL: v2f64_maximum: @@ -619,9 +564,7 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; VSX-NEXT: xxlxor 36, 36, 36 ; VSX-NEXT: vcmpequd 5, 2, 4 ; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxsel 1, 0, 34, 37 -; VSX-NEXT: vcmpequd 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 +; VSX-NEXT: xxsel 1, 35, 34, 37 ; VSX-NEXT: xvcmpeqdp 34, 0, 36 ; VSX-NEXT: xxsel 34, 0, 1, 34 ; VSX-NEXT: blr @@ -639,9 +582,7 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; AIX-NEXT: xxlxor 36, 36, 36 ; AIX-NEXT: vcmpequd 5, 2, 4 ; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxsel 1, 0, 34, 37 -; AIX-NEXT: vcmpequd 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 +; AIX-NEXT: xxsel 1, 35, 34, 37 ; AIX-NEXT: xvcmpeqdp 34, 0, 36 ; AIX-NEXT: xxsel 34, 0, 1, 34 ; AIX-NEXT: blr diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll index d3f85ce51edea..fd76bd56bce43 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll @@ -15,63 +15,52 @@ define half @maximum_half(half %x, half %y) nounwind { ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fsts {{[0-9]+}}(%esp) -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fucom %st(1) -; CHECK-NEXT: fnstsw %ax -; CHECK-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-NEXT: sahf +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: ja .LBB0_2 +; CHECK-NEXT: je .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fld %st(1) ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; CHECK-NEXT: jp .LBB0_4 +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: ja .LBB0_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: je .LBB0_6 +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB0_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: .LBB0_6: -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: je .LBB0_8 -; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: fstp %st(2) ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: .LBB0_8: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fucom %st(1) -; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fstp %st(2) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: jp .LBB0_13 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: jmp .LBB0_12 -; CHECK-NEXT: .LBB0_9: +; CHECK-NEXT: jne .LBB0_7 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: jp .LBB0_11 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: .LBB0_12: +; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: .LBB0_7: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: .LBB0_10: ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: .LBB0_13: -; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB0_11: +; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll __truncsfhf2 ; CHECK-NEXT: addl $24, %esp @@ -84,68 +73,57 @@ define half @maximum_half(half %x, half %y) nounwind { define float @maximum_float(float %x, float %y) nounwind { ; CHECK-LABEL: maximum_float: ; CHECK: # %bb.0: -; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fsts (%esp) -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fsts {{[0-9]+}}(%esp) -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fucom %st(1) -; CHECK-NEXT: fnstsw %ax -; CHECK-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-NEXT: sahf +; CHECK-NEXT: cmpl $0, (%esp) ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: ja .LBB1_2 +; CHECK-NEXT: je .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fld %st(1) ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; CHECK-NEXT: jp .LBB1_4 +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: ja .LBB1_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: .LBB1_4: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: cmpl $0, (%esp) -; CHECK-NEXT: je .LBB1_6 +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB1_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: .LBB1_6: -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: je .LBB1_8 -; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: fstp %st(2) ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: .LBB1_8: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fucom %st(1) -; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fstp %st(2) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf -; CHECK-NEXT: jne .LBB1_9 -; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: jp .LBB1_13 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: jmp .LBB1_12 -; CHECK-NEXT: .LBB1_9: +; CHECK-NEXT: jne .LBB1_7 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: jp .LBB1_11 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: .LBB1_12: +; CHECK-NEXT: jmp .LBB1_10 +; CHECK-NEXT: .LBB1_7: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: .LBB1_10: ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: .LBB1_13: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: .LBB1_11: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: popl %eax ; CHECK-NEXT: retl %res = call float @llvm.maximum.f32(float %x, float %y) ret float %res @@ -154,68 +132,57 @@ define float @maximum_float(float %x, float %y) nounwind { define double @maximum_double(double %x, double %y) nounwind { ; CHECK-LABEL: maximum_double: ; CHECK: # %bb.0: -; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) -; CHECK-NEXT: fstl {{[0-9]+}}(%esp) -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstl (%esp) -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fucom %st(1) -; CHECK-NEXT: fnstsw %ax -; CHECK-NEXT: # kill: def $ah killed $ah killed $ax -; CHECK-NEXT: sahf +; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: ja .LBB2_2 +; CHECK-NEXT: je .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fld %st(1) ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; CHECK-NEXT: jp .LBB2_4 +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: ja .LBB2_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: je .LBB2_6 +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB2_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: .LBB2_6: -; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: je .LBB2_8 -; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: fstp %st(2) ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: .LBB2_8: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fucom %st(1) -; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fstp %st(2) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf -; CHECK-NEXT: jne .LBB2_9 -; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: jp .LBB2_13 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: jmp .LBB2_12 -; CHECK-NEXT: .LBB2_9: +; CHECK-NEXT: jne .LBB2_7 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: jp .LBB2_11 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: .LBB2_12: +; CHECK-NEXT: jmp .LBB2_10 +; CHECK-NEXT: .LBB2_7: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: .LBB2_10: ; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: .LBB2_13: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: .LBB2_11: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %res = call double @llvm.maximum.f64(double %x, double %y) ret double %res @@ -272,63 +239,57 @@ define half @minimum_half(half %x, half %y) nounwind { ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fsts {{[0-9]+}}(%esp) +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: jne .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: fucom %st(1) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf ; CHECK-NEXT: fld %st(1) -; CHECK-NEXT: ja .LBB4_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ja .LBB4_4 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fucompp ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf ; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; CHECK-NEXT: jp .LBB4_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(3) -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: fstp %st(3) -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jne .LBB4_6 +; CHECK-NEXT: jp .LBB4_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: .LBB4_6: -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jne .LBB4_8 -; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fldz -; CHECK-NEXT: .LBB4_8: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fucom %st(2) -; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf -; CHECK-NEXT: jne .LBB4_9 -; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: jp .LBB4_13 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: jmp .LBB4_12 -; CHECK-NEXT: .LBB4_9: +; CHECK-NEXT: jne .LBB4_7 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: jp .LBB4_11 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: .LBB4_12: +; CHECK-NEXT: jmp .LBB4_10 +; CHECK-NEXT: .LBB4_7: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB4_10: ; CHECK-NEXT: fldz -; CHECK-NEXT: .LBB4_13: -; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB4_11: +; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fstps (%esp) ; CHECK-NEXT: calll __truncsfhf2 ; CHECK-NEXT: addl $24, %esp @@ -341,68 +302,62 @@ define half @minimum_half(half %x, half %y) nounwind { define float @minimum_float(float %x, float %y) nounwind { ; CHECK-LABEL: minimum_float: ; CHECK: # %bb.0: -; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fsts (%esp) -; CHECK-NEXT: fxch %st(1) -; CHECK-NEXT: fsts {{[0-9]+}}(%esp) +; CHECK-NEXT: cmpl $0, (%esp) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: jne .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: fucom %st(1) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf ; CHECK-NEXT: fld %st(1) -; CHECK-NEXT: ja .LBB5_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ja .LBB5_4 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: .LBB5_4: ; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fucompp ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf ; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; CHECK-NEXT: jp .LBB5_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(3) -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: fstp %st(3) -; CHECK-NEXT: cmpl $0, (%esp) -; CHECK-NEXT: jne .LBB5_6 +; CHECK-NEXT: jp .LBB5_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: .LBB5_6: -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jne .LBB5_8 -; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fldz -; CHECK-NEXT: .LBB5_8: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fucom %st(2) -; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf -; CHECK-NEXT: jne .LBB5_9 -; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: jp .LBB5_13 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: jmp .LBB5_12 -; CHECK-NEXT: .LBB5_9: +; CHECK-NEXT: jne .LBB5_7 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: jp .LBB5_11 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: .LBB5_12: +; CHECK-NEXT: jmp .LBB5_10 +; CHECK-NEXT: .LBB5_7: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB5_10: ; CHECK-NEXT: fldz -; CHECK-NEXT: .LBB5_13: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB5_11: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: popl %eax ; CHECK-NEXT: retl %res = call float @llvm.minimum.f32(float %x, float %y) ret float %res @@ -411,68 +366,62 @@ define float @minimum_float(float %x, float %y) nounwind { define double @minimum_double(double %x, double %y) nounwind { ; CHECK-LABEL: minimum_double: ; CHECK: # %bb.0: -; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) -; CHECK-NEXT: fstl {{[0-9]+}}(%esp) -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstl (%esp) +; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: jne .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: fxch %st(2) ; CHECK-NEXT: fucom %st(1) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf ; CHECK-NEXT: fld %st(1) -; CHECK-NEXT: ja .LBB6_2 -; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ja .LBB6_4 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: fstp %st(0) ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: .LBB6_4: ; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fucompp ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf ; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} -; CHECK-NEXT: jp .LBB6_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(3) -; CHECK-NEXT: .LBB6_4: -; CHECK-NEXT: fstp %st(3) -; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jne .LBB6_6 +; CHECK-NEXT: jp .LBB6_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: .LBB6_6: -; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jne .LBB6_8 -; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fldz -; CHECK-NEXT: .LBB6_8: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: fldz -; CHECK-NEXT: fxch %st(2) -; CHECK-NEXT: fucom %st(2) -; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-NEXT: sahf -; CHECK-NEXT: jne .LBB6_9 -; CHECK-NEXT: # %bb.10: -; CHECK-NEXT: jp .LBB6_13 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fstp %st(1) -; CHECK-NEXT: jmp .LBB6_12 -; CHECK-NEXT: .LBB6_9: +; CHECK-NEXT: jne .LBB6_7 +; CHECK-NEXT: # %bb.8: +; CHECK-NEXT: jp .LBB6_11 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: .LBB6_12: +; CHECK-NEXT: jmp .LBB6_10 +; CHECK-NEXT: .LBB6_7: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB6_10: ; CHECK-NEXT: fldz -; CHECK-NEXT: .LBB6_13: -; CHECK-NEXT: fstp %st(0) -; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB6_11: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %res = call double @llvm.minimum.f64(double %x, double %y) ret double %res diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index 80e3a017a44e3..ccebd3c5db062 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -1972,10 +1972,8 @@ define <4 x half> @test_fmaximum_v4f16(<4 x half> %x, <4 x half> %y) nounwind { ; AVX512-NEXT: cmovpl %eax, %ecx ; AVX512-NEXT: vpinsrw $7, %ecx, %xmm3, %xmm3 ; AVX512-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm5 -; AVX512-NEXT: vpblendvb %xmm5, %xmm0, %xmm2, %xmm0 -; AVX512-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm4 -; AVX512-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm4 +; AVX512-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpblendvb %xmm3, %xmm0, %xmm2, %xmm0 ; AVX512-NEXT: popq %rbx ; AVX512-NEXT: popq %r12 @@ -2654,150 +2652,135 @@ define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind { ; SSE2-LABEL: maximum_fp128: ; SSE2: # %bb.0: ; SSE2-NEXT: subq $88, %rsp -; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) -; SSE2-NEXT: callq __gttf2@PLT -; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; SSE2-NEXT: jg .LBB35_2 +; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: je .LBB35_2 ; SSE2-NEXT: # %bb.1: -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: movaps %xmm1, %xmm2 ; SSE2-NEXT: .LBB35_2: +; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; SSE2-NEXT: callq __unordtf2@PLT -; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: jne .LBB35_3 -; SSE2-NEXT: # %bb.4: +; SSE2-NEXT: callq __gttf2@PLT ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE2-NEXT: jmp .LBB35_5 -; SSE2-NEXT: .LBB35_3: -; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN] -; SSE2-NEXT: .LBB35_5: -; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; SSE2-NEXT: je .LBB35_7 +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: jg .LBB35_4 +; SSE2-NEXT: # %bb.3: +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: .LBB35_4: +; SSE2-NEXT: movaps %xmm2, (%rsp) # 16-byte Spill +; SSE2-NEXT: callq __unordtf2@PLT +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: jne .LBB35_5 ; SSE2-NEXT: # %bb.6: -; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: jmp .LBB35_7 +; SSE2-NEXT: .LBB35_5: +; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN] ; SSE2-NEXT: .LBB35_7: -; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) -; SSE2-NEXT: je .LBB35_9 -; SSE2-NEXT: # %bb.8: -; SSE2-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; SSE2-NEXT: .LBB35_9: -; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; SSE2-NEXT: callq __eqtf2@PLT -; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE2-NEXT: testl %eax, %eax -; SSE2-NEXT: je .LBB35_11 -; SSE2-NEXT: # %bb.10: ; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; SSE2-NEXT: .LBB35_11: +; SSE2-NEXT: je .LBB35_9 +; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: .LBB35_9: ; SSE2-NEXT: addq $88, %rsp ; SSE2-NEXT: retq ; ; AVX-LABEL: maximum_fp128: ; AVX: # %bb.0: ; AVX-NEXT: subq $88, %rsp -; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) -; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) -; AVX-NEXT: callq __gttf2@PLT -; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX-NEXT: jg .LBB35_2 +; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; AVX-NEXT: vmovaps %xmm0, %xmm2 +; AVX-NEXT: je .LBB35_2 ; AVX-NEXT: # %bb.1: -; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: vmovaps %xmm1, %xmm2 ; AVX-NEXT: .LBB35_2: +; AVX-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX-NEXT: callq __unordtf2@PLT -; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: jne .LBB35_3 -; AVX-NEXT: # %bb.4: +; AVX-NEXT: callq __gttf2@PLT ; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; AVX-NEXT: jmp .LBB35_5 -; AVX-NEXT: .LBB35_3: -; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [NaN] -; AVX-NEXT: .LBB35_5: -; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp) ; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; AVX-NEXT: je .LBB35_7 +; AVX-NEXT: testl %eax, %eax +; AVX-NEXT: vmovaps %xmm0, %xmm2 +; AVX-NEXT: jg .LBB35_4 +; AVX-NEXT: # %bb.3: +; AVX-NEXT: vmovaps %xmm1, %xmm2 +; AVX-NEXT: .LBB35_4: +; AVX-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX-NEXT: callq __unordtf2@PLT +; AVX-NEXT: testl %eax, %eax +; AVX-NEXT: jne .LBB35_5 ; AVX-NEXT: # %bb.6: -; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX-NEXT: jmp .LBB35_7 +; AVX-NEXT: .LBB35_5: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [NaN] ; AVX-NEXT: .LBB35_7: -; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp) -; AVX-NEXT: je .LBB35_9 -; AVX-NEXT: # %bb.8: -; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload -; AVX-NEXT: .LBB35_9: -; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX-NEXT: callq __eqtf2@PLT -; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; AVX-NEXT: testl %eax, %eax -; AVX-NEXT: je .LBB35_11 -; AVX-NEXT: # %bb.10: ; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; AVX-NEXT: .LBB35_11: +; AVX-NEXT: je .LBB35_9 +; AVX-NEXT: # %bb.8: +; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX-NEXT: .LBB35_9: ; AVX-NEXT: addq $88, %rsp ; AVX-NEXT: retq ; ; AVX10_2-LABEL: maximum_fp128: ; AVX10_2: # %bb.0: ; AVX10_2-NEXT: subq $88, %rsp -; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX10_2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) -; AVX10_2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) -; AVX10_2-NEXT: callq __gttf2@PLT -; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; AVX10_2-NEXT: testl %eax, %eax -; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX10_2-NEXT: jg .LBB35_2 +; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; AVX10_2-NEXT: vmovaps %xmm0, %xmm2 +; AVX10_2-NEXT: je .LBB35_2 ; AVX10_2-NEXT: # %bb.1: -; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: vmovaps %xmm1, %xmm2 ; AVX10_2-NEXT: .LBB35_2: +; AVX10_2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX10_2-NEXT: callq __unordtf2@PLT -; AVX10_2-NEXT: testl %eax, %eax -; AVX10_2-NEXT: jne .LBB35_3 -; AVX10_2-NEXT: # %bb.4: +; AVX10_2-NEXT: callq __gttf2@PLT ; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; AVX10_2-NEXT: jmp .LBB35_5 -; AVX10_2-NEXT: .LBB35_3: -; AVX10_2-NEXT: vmovaps {{.*#+}} xmm0 = [NaN] -; AVX10_2-NEXT: .LBB35_5: -; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) ; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; AVX10_2-NEXT: je .LBB35_7 +; AVX10_2-NEXT: testl %eax, %eax +; AVX10_2-NEXT: vmovaps %xmm0, %xmm2 +; AVX10_2-NEXT: jg .LBB35_4 +; AVX10_2-NEXT: # %bb.3: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm2 +; AVX10_2-NEXT: .LBB35_4: +; AVX10_2-NEXT: vmovaps %xmm2, (%rsp) # 16-byte Spill +; AVX10_2-NEXT: callq __unordtf2@PLT +; AVX10_2-NEXT: testl %eax, %eax +; AVX10_2-NEXT: jne .LBB35_5 ; AVX10_2-NEXT: # %bb.6: -; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: jmp .LBB35_7 +; AVX10_2-NEXT: .LBB35_5: +; AVX10_2-NEXT: vmovaps {{.*#+}} xmm0 = [NaN] ; AVX10_2-NEXT: .LBB35_7: -; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) -; AVX10_2-NEXT: je .LBB35_9 -; AVX10_2-NEXT: # %bb.8: -; AVX10_2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload -; AVX10_2-NEXT: .LBB35_9: -; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX10_2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX10_2-NEXT: callq __eqtf2@PLT -; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; AVX10_2-NEXT: testl %eax, %eax -; AVX10_2-NEXT: je .LBB35_11 -; AVX10_2-NEXT: # %bb.10: ; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; AVX10_2-NEXT: .LBB35_11: +; AVX10_2-NEXT: je .LBB35_9 +; AVX10_2-NEXT: # %bb.8: +; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: .LBB35_9: ; AVX10_2-NEXT: addq $88, %rsp ; AVX10_2-NEXT: retq ;