diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index a98e46c587273..7dc70d5c50c03 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -215,6 +215,20 @@ class SDDbgInfo { LLVM_ABI void checkForCycles(const SelectionDAG *DAG, bool force = false); +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; + uint8_t SignBit; +}; + /// This is used to represent a portion of an LLVM function in a low-level /// Data Dependence DAG representation suitable for instruction selection. /// This DAG is constructed as the first step of instruction selection in order @@ -2017,6 +2031,16 @@ class SelectionDAG { /// value types. LLVM_ABI SDValue CreateStackTemporary(EVT VT1, EVT VT2); + /// Bitcast a floating-point value to an integer value. Only bitcast the part + /// containing the sign bit if the target has no integer value capable of + /// holding all bits of the floating-point value. + void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, SDValue Value); + + /// Replace the integer value produced by getSignAsIntValue() with a new value + /// and cast the result back to a floating-point type. + SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, + SDValue NewIntValue); + LLVM_ABI SDValue FoldSymbolOffset(unsigned Opcode, EVT VT, const GlobalAddressSDNode *GA, const SDNode *N2); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 528c07cc5549d..2c41a871b6d6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -59,20 +59,6 @@ using namespace llvm; namespace { -/// Keeps track of state when getting the sign of a floating-point value as an -/// integer. -struct FloatSignAsInt { - EVT FloatVT; - SDValue Chain; - SDValue FloatPtr; - SDValue IntPtr; - MachinePointerInfo IntPointerInfo; - MachinePointerInfo FloatPointerInfo; - SDValue IntValue; - APInt SignMask; - uint8_t SignBit; -}; - //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves @@ -166,10 +152,6 @@ class SelectionDAGLegalize { SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); - void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, - SDValue Value) const; - SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, - SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandFNEG(SDNode *Node) const; @@ -1620,74 +1602,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } -/// Bitcast a floating-point value to an integer value. Only bitcast the part -/// containing the sign bit if the target has no integer value capable of -/// holding all bits of the floating-point value. -void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, - const SDLoc &DL, - SDValue Value) const { - EVT FloatVT = Value.getValueType(); - unsigned NumBits = FloatVT.getScalarSizeInBits(); - State.FloatVT = FloatVT; - EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); - // Convert to an integer of the same size. - if (TLI.isTypeLegal(IVT)) { - State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); - State.SignMask = APInt::getSignMask(NumBits); - State.SignBit = NumBits - 1; - return; - } - - auto &DataLayout = DAG.getDataLayout(); - // Store the float to memory, then load the sign part out as an integer. - MVT LoadTy = TLI.getRegisterType(MVT::i8); - // First create a temporary that is aligned for both the load and store. - SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); - int FI = cast(StackPtr.getNode())->getIndex(); - // Then store the float to it. - State.FloatPtr = StackPtr; - MachineFunction &MF = DAG.getMachineFunction(); - State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); - State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, - State.FloatPointerInfo); - - SDValue IntPtr; - if (DataLayout.isBigEndian()) { - assert(FloatVT.isByteSized() && "Unsupported floating point type!"); - // Load out a legal integer with the same sign bit as the float. - IntPtr = StackPtr; - State.IntPointerInfo = State.FloatPointerInfo; - } else { - // Advance the pointer so that the loaded byte will contain the sign bit. - unsigned ByteOffset = (NumBits / 8) - 1; - IntPtr = - DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(ByteOffset), DL); - State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, - ByteOffset); - } - - State.IntPtr = IntPtr; - State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, - State.IntPointerInfo, MVT::i8); - State.SignMask = APInt::getOneBitSet(LoadTy.getScalarSizeInBits(), 7); - State.SignBit = 7; -} - -/// Replace the integer value produced by getSignAsIntValue() with a new value -/// and cast the result back to a floating-point type. -SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, - const SDLoc &DL, - SDValue NewIntValue) const { - if (!State.Chain) - return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); - - // Override the part containing the sign bit in the value stored on the stack. - SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, - State.IntPointerInfo, MVT::i8); - return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, - State.FloatPointerInfo); -} - SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SDLoc DL(Node); SDValue Mag = Node->getOperand(0); @@ -1695,7 +1609,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { // Get sign bit into an integer value. FloatSignAsInt SignAsInt; - getSignAsIntValue(SignAsInt, DL, Sign); + DAG.getSignAsIntValue(SignAsInt, DL, Sign); EVT IntVT = SignAsInt.IntValue.getValueType(); SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); @@ -1716,7 +1630,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { // Transform Mag value to integer, and clear the sign bit. FloatSignAsInt MagAsInt; - getSignAsIntValue(MagAsInt, DL, Mag); + DAG.getSignAsIntValue(MagAsInt, DL, Mag); EVT MagVT = MagAsInt.IntValue.getValueType(); SDValue ClearSignMask = DAG.getConstant(~MagAsInt.SignMask, DL, MagVT); SDValue ClearedSign = DAG.getNode(ISD::AND, DL, MagVT, MagAsInt.IntValue, @@ -1746,14 +1660,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { SDValue CopiedSign = DAG.getNode(ISD::OR, DL, MagVT, ClearedSign, SignBit, SDNodeFlags::Disjoint); - return modifySignAsInt(MagAsInt, DL, CopiedSign); + return DAG.modifySignAsInt(MagAsInt, DL, CopiedSign); } SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const { // Get the sign bit as an integer. SDLoc DL(Node); FloatSignAsInt SignAsInt; - getSignAsIntValue(SignAsInt, DL, Node->getOperand(0)); + DAG.getSignAsIntValue(SignAsInt, DL, Node->getOperand(0)); EVT IntVT = SignAsInt.IntValue.getValueType(); // Flip the sign. @@ -1762,7 +1676,7 @@ SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const { DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, SignMask); // Convert back to float. - return modifySignAsInt(SignAsInt, DL, SignFlip); + return DAG.modifySignAsInt(SignAsInt, DL, SignFlip); } SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { @@ -1778,12 +1692,12 @@ SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { // Transform value to integer, clear the sign bit and transform back. FloatSignAsInt ValueAsInt; - getSignAsIntValue(ValueAsInt, DL, Value); + DAG.getSignAsIntValue(ValueAsInt, DL, Value); EVT IntVT = ValueAsInt.IntValue.getValueType(); SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, ClearSignMask); - return modifySignAsInt(ValueAsInt, DL, ClearedSign); + return DAG.modifySignAsInt(ValueAsInt, DL, ClearedSign); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1506bc4ee187d..023d53f24ed19 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2760,6 +2760,66 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { return CreateStackTemporary(Bytes, Align); } +void SelectionDAG::getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, + SDValue Value) { + EVT FloatVT = Value.getValueType(); + unsigned NumBits = FloatVT.getScalarSizeInBits(); + State.FloatVT = FloatVT; + EVT IVT = FloatVT.changeTypeToInteger(); + // Convert to an integer of the same size. + if (TLI->isTypeLegal(IVT)) { + State.IntValue = getNode(ISD::BITCAST, DL, IVT, Value); + State.SignMask = APInt::getSignMask(NumBits); + State.SignBit = NumBits - 1; + return; + } + + auto &DataLayout = getDataLayout(); + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI->getRegisterType(MVT::i8); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = CreateStackTemporary(FloatVT, LoadTy); + int FI = cast(StackPtr.getNode())->getIndex(); + // Then store the float to it. + State.FloatPtr = StackPtr; + MachineFunction &MF = getMachineFunction(); + State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); + State.Chain = getStore(getEntryNode(), DL, Value, State.FloatPtr, + State.FloatPointerInfo); + + SDValue IntPtr; + if (DataLayout.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + IntPtr = StackPtr; + State.IntPointerInfo = State.FloatPointerInfo; + } else { + // Advance the pointer so that the loaded byte will contain the sign bit. + unsigned ByteOffset = (NumBits / 8) - 1; + IntPtr = getMemBasePlusOffset(StackPtr, TypeSize::getFixed(ByteOffset), DL); + State.IntPointerInfo = + MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); + } + + State.IntPtr = IntPtr; + State.IntValue = getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, + State.IntPointerInfo, MVT::i8); + State.SignMask = APInt::getOneBitSet(LoadTy.getScalarSizeInBits(), 7); + State.SignBit = 7; +} + +SDValue SelectionDAG::modifySignAsInt(const FloatSignAsInt &State, + const SDLoc &DL, SDValue NewIntValue) { + if (!State.Chain) + return getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); + + // Override the part containing the sign bit in the value stored on the stack. + SDValue Chain = getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, + State.IntPointerInfo, MVT::i8); + return getLoad(State.FloatVT, DL, Chain, State.FloatPtr, + State.FloatPointerInfo); +} + SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl) { EVT OpVT = N1.getValueType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 22d0bc9914585..13315fed7ed2a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8610,16 +8610,18 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, // fminimum/fmaximum requires -0.0 less than +0.0 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() && !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) { + auto IsSpecificZero = [&](SDValue F) { + FloatSignAsInt State; + DAG.getSignAsIntValue(State, DL, F); + return DAG.getSetCC(DL, CCVT, State.IntValue, + DAG.getConstant(0, DL, State.IntValue.getValueType()), + IsMax ? ISD::SETEQ : ISD::SETNE); + }; SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ); - SDValue TestZero = - DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); - SDValue LCmp = DAG.getSelect( - DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, - MinMax, Flags); - SDValue RCmp = DAG.getSelect( - DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, - LCmp, Flags); + SDValue LCmp = + DAG.getSelect(DL, VT, IsSpecificZero(LHS), LHS, MinMax, Flags); + SDValue RCmp = DAG.getSelect(DL, VT, IsSpecificZero(RHS), RHS, LCmp, Flags); MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags); } diff --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll index 86c1474068482..9f542abcb80f7 100644 --- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll +++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll @@ -41,3 +41,59 @@ define <4 x half> @fmaximum_v4f16(<4 x half> %x, <4 x half> %y) { %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %x, <4 x half> %y) ret <4 x half> %r } + +define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind { +; CHECK-LABEL: maximum_fp128: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #48] +; CHECK-NEXT: bl __gttf2 +; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: b.le .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __unordtf2 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: b.eq .LBB1_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: ldrb w8, [sp, #79] +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: b.ne .LBB1_6 +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: .LBB1_6: +; CHECK-NEXT: ldrb w8, [sp, #63] +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: b.ne .LBB1_8 +; CHECK-NEXT: // %bb.7: +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .LBB1_8: +; CHECK-NEXT: adrp x8, .LCPI1_1 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1] +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: bl __eqtf2 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: b.ne .LBB1_10 +; CHECK-NEXT: // %bb.9: +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .LBB1_10: +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ret + %res = call fp128 @llvm.maximum.f128(fp128 %x, fp128 %y) + ret fp128 %res +} diff --git a/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll new file mode 100644 index 0000000000000..a3ab144356e16 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fp-maximum-legalization.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=armv7 | FileCheck %s + +define double @maximum_double(double %x, double %y) nounwind { +; CHECK-LABEL: maximum_double: +; CHECK: @ %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: vcmp.f64 d16, d17 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vstr d16, [sp, #8] +; CHECK-NEXT: vstr d17, [sp] +; CHECK-NEXT: ldrb r1, [sp, #15] +; CHECK-NEXT: vmov.f64 d19, d17 +; CHECK-NEXT: clz r1, r1 +; CHECK-NEXT: vldr d18, .LCPI0_0 +; CHECK-NEXT: movwvs r2, #1 +; CHECK-NEXT: movwgt r3, #1 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: vmovne.f64 d19, d16 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: ldrb r2, [sp, #7] +; CHECK-NEXT: vmovne.f64 d19, d18 +; CHECK-NEXT: lsrs r1, r1, #5 +; CHECK-NEXT: clz r1, r2 +; CHECK-NEXT: vcmp.f64 d19, #0 +; CHECK-NEXT: vmov.f64 d18, d19 +; CHECK-NEXT: vmovne.f64 d18, d16 +; CHECK-NEXT: lsrs r1, r1, #5 +; CHECK-NEXT: vmovne.f64 d18, d17 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vmovne.f64 d19, d18 +; CHECK-NEXT: vmov r0, r1, d19 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 0 @ double NaN +; CHECK-NEXT: .long 2146959360 + %res = call double @llvm.maximum(double %x, double %y) + ret double %res +} diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll index 32225ed04e2d9..096649e5bde43 100644 --- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll @@ -1476,7 +1476,7 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-LABEL: test_maximum_v2( ; SM70: { ; SM70-NEXT: .reg .pred %p<11>; -; SM70-NEXT: .reg .b16 %rs<15>; +; SM70-NEXT: .reg .b16 %rs<19>; ; SM70-NEXT: .reg .b32 %r<16>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -1493,30 +1493,30 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) { ; SM70-NEXT: setp.nan.f32 %p2, %r6, %r4; ; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2; ; SM70-NEXT: setp.eq.s16 %p3, %rs4, 0; -; SM70-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; +; SM70-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; ; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0; -; SM70-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; +; SM70-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; ; SM70-NEXT: cvt.u32.u16 %r7, %rs6; ; SM70-NEXT: shl.b32 %r8, %r7, 16; ; SM70-NEXT: setp.eq.f32 %p5, %r8, 0f00000000; -; SM70-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; +; SM70-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; ; SM70-NEXT: cvt.u32.u16 %r9, %rs1; ; SM70-NEXT: shl.b32 %r10, %r9, 16; ; SM70-NEXT: cvt.u32.u16 %r11, %rs3; ; SM70-NEXT: shl.b32 %r12, %r11, 16; ; SM70-NEXT: setp.gt.f32 %p6, %r12, %r10; -; SM70-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; +; SM70-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; ; SM70-NEXT: setp.nan.f32 %p7, %r12, %r10; -; SM70-NEXT: selp.b16 %rs11, 0x7FC0, %rs10, %p7; +; SM70-NEXT: selp.b16 %rs15, 0x7FC0, %rs14, %p7; ; SM70-NEXT: setp.eq.s16 %p8, %rs3, 0; -; SM70-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; +; SM70-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; ; SM70-NEXT: setp.eq.s16 %p9, %rs1, 0; -; SM70-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; -; SM70-NEXT: cvt.u32.u16 %r13, %rs11; +; SM70-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; +; SM70-NEXT: cvt.u32.u16 %r13, %rs15; ; SM70-NEXT: shl.b32 %r14, %r13, 16; ; SM70-NEXT: setp.eq.f32 %p10, %r14, 0f00000000; -; SM70-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; -; SM70-NEXT: mov.b32 %r15, {%rs14, %rs9}; +; SM70-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; +; SM70-NEXT: mov.b32 %r15, {%rs18, %rs13}; ; SM70-NEXT: st.param.b32 [func_retval0], %r15; ; SM70-NEXT: ret; ; diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index 71af7a7d475d3..441fdec7ce5c0 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -625,9 +625,9 @@ define half @minimum_half(half %a, half %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; +; CHECK-NOF16-NEXT: setp.ne.s16 %p3, %rs1, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; +; CHECK-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; ; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -661,9 +661,9 @@ define half @minimum_half(half %a, half %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p3, %rs1, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000; @@ -686,9 +686,9 @@ define float @minimum_float(float %a, float %b) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; +; CHECK-NOF16-NEXT: setp.ne.s32 %p3, %r2, 0; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; @@ -731,7 +731,7 @@ define float @minimum_imm1(float %a) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; @@ -772,7 +772,7 @@ define float @minimum_imm2(float %a) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1; ; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2; ; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3; @@ -814,9 +814,9 @@ define float @minimum_float_ftz(float %a, float %b) #1 { ; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2; ; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2; ; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1; -; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648; +; CHECK-NOF16-NEXT: setp.ne.s32 %p2, %r1, 0; ; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2; -; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648; +; CHECK-NOF16-NEXT: setp.ne.s32 %p3, %r2, 0; ; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3; ; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000; ; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4; @@ -860,9 +860,9 @@ define double @minimum_double(double %a, double %b) { ; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2; ; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2; ; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1; -; CHECK-NEXT: setp.eq.s64 %p2, %rd1, -9223372036854775808; +; CHECK-NEXT: setp.ne.s64 %p2, %rd1, 0; ; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2; -; CHECK-NEXT: setp.eq.s64 %p3, %rd2, -9223372036854775808; +; CHECK-NEXT: setp.ne.s64 %p3, %rd2, 0; ; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3; ; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000; ; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4; @@ -876,7 +876,7 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: minimum_v2half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<19>; ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: @@ -890,27 +890,27 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; ; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, -32768; -; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; -; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; -; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; +; CHECK-NOF16-NEXT: setp.ne.s16 %p3, %rs4, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; +; CHECK-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; +; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: setp.lt.f32 %p6, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; +; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; -; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, -32768; -; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; -; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, -32768; -; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs11; +; CHECK-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; +; CHECK-NOF16-NEXT: setp.ne.s16 %p8, %rs3, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; +; CHECK-NOF16-NEXT: setp.ne.s16 %p9, %rs1, 0; +; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; -; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs9}; +; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; +; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NOF16-NEXT: ret; ; @@ -928,7 +928,7 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-LABEL: minimum_v2half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: @@ -942,27 +942,27 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p1; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, -32768; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p3, %rs4, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p4, %rs2, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-SM80-NOF16-NEXT: setp.lt.f32 %p6, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, -32768; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; -; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, -32768; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs11; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p8, %rs3, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; +; CHECK-SM80-NOF16-NEXT: setp.ne.s16 %p9, %rs1, 0; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; -; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs9}; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) @@ -1413,7 +1413,7 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-LABEL: maximum_v2half( ; CHECK-NOF16: { ; CHECK-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-NOF16-NEXT: .reg .b16 %rs<15>; +; CHECK-NOF16-NEXT: .reg .b16 %rs<19>; ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: @@ -1428,26 +1428,26 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; ; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; ; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; +; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; ; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; +; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; ; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; +; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: setp.gt.f32 %p6, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; +; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; ; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; +; CHECK-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; ; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; +; CHECK-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; ; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; -; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs11; +; CHECK-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; ; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; -; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs9}; +; CHECK-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; +; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NOF16-NEXT: ret; ; @@ -1465,7 +1465,7 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-LABEL: maximum_v2half( ; CHECK-SM80-NOF16: { ; CHECK-SM80-NOF16-NEXT: .reg .pred %p<11>; -; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<15>; +; CHECK-SM80-NOF16-NEXT: .reg .b16 %rs<19>; ; CHECK-SM80-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-SM80-NOF16-EMPTY: ; CHECK-SM80-NOF16-NEXT: // %bb.0: @@ -1480,26 +1480,26 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) { ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r4, %r3; ; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2; ; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs4, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs4, %rs6, %p3; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs4, %rs6, %p3; ; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs2, %rs7, %p4; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs2, %rs9, %p4; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r5, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p5; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs12, %rs6, %p5; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; ; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-SM80-NOF16-NEXT: setp.gt.f32 %p6, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs3, %rs1, %p6; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs3, %rs1, %p6; ; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r7, %r6; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs15, 0x7E00, %rs14, %p7; ; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs3, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs3, %rs11, %p8; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs16, %rs3, %rs15, %p8; ; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs1, 0; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs1, %rs12, %p9; -; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs11; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs17, %rs1, %rs16, %p9; +; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r8, %rs15; ; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r8, 0f00000000; -; CHECK-SM80-NOF16-NEXT: selp.b16 %rs14, %rs13, %rs11, %p10; -; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs14, %rs9}; +; CHECK-SM80-NOF16-NEXT: selp.b16 %rs18, %rs17, %rs15, %p10; +; CHECK-SM80-NOF16-NEXT: mov.b32 %r9, {%rs18, %rs13}; ; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-SM80-NOF16-NEXT: ret; %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll index 6d9eb13376827..48107c8f63727 100644 --- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum-f128.ll @@ -4,44 +4,42 @@ define fp128 @f128_minimum(fp128 %a, fp128 %b) { ; CHECK-LABEL: f128_minimum: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxv 34, -16(1) +; CHECK-NEXT: stxv 35, -32(1) ; CHECK-NEXT: xscmpuqp 0, 2, 3 ; CHECK-NEXT: vmr 4, 2 -; CHECK-NEXT: bge 0, .LBB0_8 +; CHECK-NEXT: blt 0, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: bun 0, .LBB0_9 +; CHECK-NEXT: vmr 4, 3 ; CHECK-NEXT: .LBB0_2: # %entry -; CHECK-NEXT: xststdcqp 0, 2, 4 -; CHECK-NEXT: bc 4, 2, .LBB0_10 -; CHECK-NEXT: .LBB0_3: # %entry -; CHECK-NEXT: xststdcqp 0, 3, 4 -; CHECK-NEXT: bc 12, 2, .LBB0_5 +; CHECK-NEXT: bnu 0, .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lxv 36, 0(3) ; CHECK-NEXT: .LBB0_4: # %entry +; CHECK-NEXT: lbz 3, -1(1) +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: bne 0, .LBB0_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: .LBB0_6: # %entry +; CHECK-NEXT: lbz 3, -17(1) +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: bne 0, .LBB0_8 +; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: vmr 3, 2 -; CHECK-NEXT: .LBB0_5: # %entry +; CHECK-NEXT: .LBB0_8: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l ; CHECK-NEXT: lxv 34, 0(3) ; CHECK-NEXT: xscmpuqp 0, 4, 2 -; CHECK-NEXT: beq 0, .LBB0_7 -; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: beq 0, .LBB0_10 +; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: vmr 3, 4 -; CHECK-NEXT: .LBB0_7: # %entry +; CHECK-NEXT: .LBB0_10: # %entry ; CHECK-NEXT: vmr 2, 3 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB0_8: # %entry -; CHECK-NEXT: vmr 4, 3 -; CHECK-NEXT: bnu 0, .LBB0_2 -; CHECK-NEXT: .LBB0_9: -; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-NEXT: lxv 36, 0(3) -; CHECK-NEXT: xststdcqp 0, 2, 4 -; CHECK-NEXT: bc 12, 2, .LBB0_3 -; CHECK-NEXT: .LBB0_10: # %entry -; CHECK-NEXT: vmr 2, 4 -; CHECK-NEXT: xststdcqp 0, 3, 4 -; CHECK-NEXT: bc 4, 2, .LBB0_4 -; CHECK-NEXT: b .LBB0_5 entry: %m = call fp128 @llvm.minimum.f128(fp128 %a, fp128 %b) ret fp128 %m @@ -50,44 +48,42 @@ entry: define fp128 @f128_maximum(fp128 %a, fp128 %b) { ; CHECK-LABEL: f128_maximum: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxv 34, -16(1) +; CHECK-NEXT: stxv 35, -32(1) ; CHECK-NEXT: xscmpuqp 0, 2, 3 ; CHECK-NEXT: vmr 4, 2 -; CHECK-NEXT: ble 0, .LBB1_8 +; CHECK-NEXT: bgt 0, .LBB1_2 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: bun 0, .LBB1_9 +; CHECK-NEXT: vmr 4, 3 ; CHECK-NEXT: .LBB1_2: # %entry -; CHECK-NEXT: xststdcqp 0, 2, 8 -; CHECK-NEXT: bc 4, 2, .LBB1_10 -; CHECK-NEXT: .LBB1_3: # %entry -; CHECK-NEXT: xststdcqp 0, 3, 8 -; CHECK-NEXT: bc 12, 2, .LBB1_5 +; CHECK-NEXT: bnu 0, .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l +; CHECK-NEXT: lxv 36, 0(3) ; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: lbz 3, -1(1) +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: beq 0, .LBB1_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: vmr 2, 4 +; CHECK-NEXT: .LBB1_6: # %entry +; CHECK-NEXT: lbz 3, -17(1) +; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: beq 0, .LBB1_8 +; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: vmr 3, 2 -; CHECK-NEXT: .LBB1_5: # %entry +; CHECK-NEXT: .LBB1_8: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI1_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI1_1@toc@l ; CHECK-NEXT: lxv 34, 0(3) ; CHECK-NEXT: xscmpuqp 0, 4, 2 -; CHECK-NEXT: beq 0, .LBB1_7 -; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: beq 0, .LBB1_10 +; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: vmr 3, 4 -; CHECK-NEXT: .LBB1_7: # %entry +; CHECK-NEXT: .LBB1_10: # %entry ; CHECK-NEXT: vmr 2, 3 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_8: # %entry -; CHECK-NEXT: vmr 4, 3 -; CHECK-NEXT: bnu 0, .LBB1_2 -; CHECK-NEXT: .LBB1_9: -; CHECK-NEXT: addis 3, 2, .LCPI1_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI1_0@toc@l -; CHECK-NEXT: lxv 36, 0(3) -; CHECK-NEXT: xststdcqp 0, 2, 8 -; CHECK-NEXT: bc 12, 2, .LBB1_3 -; CHECK-NEXT: .LBB1_10: # %entry -; CHECK-NEXT: vmr 2, 4 -; CHECK-NEXT: xststdcqp 0, 3, 8 -; CHECK-NEXT: bc 4, 2, .LBB1_4 -; CHECK-NEXT: b .LBB1_5 entry: %m = call fp128 @llvm.maximum.f128(fp128 %a, fp128 %b) ret fp128 %m diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll index a99c25a4e4479..e199a1eab49d5 100644 --- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll @@ -7,40 +7,39 @@ define float @f32_minimum(float %a, float %b) { ; NOVSX-LABEL: f32_minimum: ; NOVSX: # %bb.0: # %entry ; NOVSX-NEXT: fcmpu 0, 1, 2 -; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfs 2, -8(1) -; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: stfs 1, -8(1) +; NOVSX-NEXT: stfs 2, -4(1) ; NOVSX-NEXT: bc 12, 0, .LBB0_2 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fmr 3, 2 ; NOVSX-NEXT: .LBB0_2: # %entry -; NOVSX-NEXT: lwz 3, -4(1) ; NOVSX-NEXT: bc 4, 3, .LBB0_4 ; NOVSX-NEXT: # %bb.3: -; NOVSX-NEXT: addis 4, 2, .LCPI0_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI0_0@toc@l(4) +; NOVSX-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; NOVSX-NEXT: lfs 3, .LCPI0_0@toc@l(3) ; NOVSX-NEXT: .LBB0_4: # %entry -; NOVSX-NEXT: xoris 3, 3, 32768 -; NOVSX-NEXT: lwz 4, -8(1) -; NOVSX-NEXT: cmplwi 3, 0 +; NOVSX-NEXT: lwz 3, -8(1) +; NOVSX-NEXT: fmr 0, 3 +; NOVSX-NEXT: cmpwi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB0_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: fmr 0, 1 ; NOVSX-NEXT: .LBB0_6: # %entry -; NOVSX-NEXT: xoris 3, 4, 32768 -; NOVSX-NEXT: cmplwi 3, 0 +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: cmpwi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB0_8 ; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: fmr 0, 2 ; NOVSX-NEXT: .LBB0_8: # %entry ; NOVSX-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; NOVSX-NEXT: lfs 1, .LCPI0_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: fcmpu 0, 3, 1 ; NOVSX-NEXT: bc 12, 2, .LBB0_10 ; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: fmr 0, 3 ; NOVSX-NEXT: .LBB0_10: # %entry -; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: blr ; ; VSX-LABEL: f32_minimum: @@ -76,25 +75,25 @@ define float @f32_maximum(float %a, float %b) { ; NOVSX: # %bb.0: # %entry ; NOVSX-NEXT: fcmpu 0, 1, 2 ; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfs 2, -8(1) -; NOVSX-NEXT: stfs 1, -4(1) +; NOVSX-NEXT: stfs 1, -8(1) +; NOVSX-NEXT: stfs 2, -4(1) ; NOVSX-NEXT: bc 12, 1, .LBB1_2 ; NOVSX-NEXT: # %bb.1: # %entry ; NOVSX-NEXT: fmr 0, 2 ; NOVSX-NEXT: .LBB1_2: # %entry -; NOVSX-NEXT: lwz 3, -4(1) ; NOVSX-NEXT: bc 4, 3, .LBB1_4 ; NOVSX-NEXT: # %bb.3: -; NOVSX-NEXT: addis 4, 2, .LCPI1_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI1_0@toc@l(4) +; NOVSX-NEXT: addis 3, 2, .LCPI1_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI1_0@toc@l(3) ; NOVSX-NEXT: .LBB1_4: # %entry +; NOVSX-NEXT: lwz 3, -8(1) ; NOVSX-NEXT: cmpwi 3, 0 -; NOVSX-NEXT: lwz 4, -8(1) ; NOVSX-NEXT: bc 12, 2, .LBB1_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB1_6: # %entry -; NOVSX-NEXT: cmpwi 4, 0 +; NOVSX-NEXT: lwz 3, -4(1) +; NOVSX-NEXT: cmpwi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB1_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 2, 1 @@ -141,40 +140,39 @@ define double @f64_minimum(double %a, double %b) { ; NOVSX-LABEL: f64_minimum: ; NOVSX: # %bb.0: # %entry ; NOVSX-NEXT: fcmpu 0, 1, 2 -; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfd 2, -16(1) -; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: stfd 1, -16(1) +; NOVSX-NEXT: stfd 2, -8(1) ; NOVSX-NEXT: bc 12, 0, .LBB2_2 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 0, 2 +; NOVSX-NEXT: fmr 3, 2 ; NOVSX-NEXT: .LBB2_2: # %entry -; NOVSX-NEXT: ld 3, -8(1) ; NOVSX-NEXT: bc 4, 3, .LBB2_4 ; NOVSX-NEXT: # %bb.3: -; NOVSX-NEXT: addis 4, 2, .LCPI2_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI2_0@toc@l(4) +; NOVSX-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; NOVSX-NEXT: lfs 3, .LCPI2_0@toc@l(3) ; NOVSX-NEXT: .LBB2_4: # %entry -; NOVSX-NEXT: li 5, 1 -; NOVSX-NEXT: ld 4, -16(1) -; NOVSX-NEXT: rldic 5, 5, 63, 0 -; NOVSX-NEXT: cmpd 3, 5 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: fmr 0, 3 +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB2_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 0 +; NOVSX-NEXT: fmr 0, 1 ; NOVSX-NEXT: .LBB2_6: # %entry -; NOVSX-NEXT: cmpd 4, 5 +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB2_8 ; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 2, 1 +; NOVSX-NEXT: fmr 0, 2 ; NOVSX-NEXT: .LBB2_8: # %entry ; NOVSX-NEXT: addis 3, 2, .LCPI2_1@toc@ha ; NOVSX-NEXT: lfs 1, .LCPI2_1@toc@l(3) -; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: fcmpu 0, 3, 1 ; NOVSX-NEXT: bc 12, 2, .LBB2_10 ; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: fmr 0, 3 ; NOVSX-NEXT: .LBB2_10: # %entry -; NOVSX-NEXT: fmr 1, 2 +; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: blr ; ; VSX-LABEL: f64_minimum: @@ -210,25 +208,25 @@ define double @f64_maximum(double %a, double %b) { ; NOVSX: # %bb.0: # %entry ; NOVSX-NEXT: fcmpu 0, 1, 2 ; NOVSX-NEXT: fmr 0, 1 -; NOVSX-NEXT: stfd 2, -16(1) -; NOVSX-NEXT: stfd 1, -8(1) +; NOVSX-NEXT: stfd 1, -16(1) +; NOVSX-NEXT: stfd 2, -8(1) ; NOVSX-NEXT: bc 12, 1, .LBB3_2 ; NOVSX-NEXT: # %bb.1: # %entry ; NOVSX-NEXT: fmr 0, 2 ; NOVSX-NEXT: .LBB3_2: # %entry -; NOVSX-NEXT: ld 3, -8(1) ; NOVSX-NEXT: bc 4, 3, .LBB3_4 ; NOVSX-NEXT: # %bb.3: -; NOVSX-NEXT: addis 4, 2, .LCPI3_0@toc@ha -; NOVSX-NEXT: lfs 0, .LCPI3_0@toc@l(4) +; NOVSX-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI3_0@toc@l(3) ; NOVSX-NEXT: .LBB3_4: # %entry +; NOVSX-NEXT: ld 3, -16(1) ; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: ld 4, -16(1) ; NOVSX-NEXT: bc 12, 2, .LBB3_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB3_6: # %entry -; NOVSX-NEXT: cmpdi 4, 0 +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB3_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 2, 1 @@ -274,26 +272,26 @@ entry: define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; NOVSX-LABEL: v4f32_minimum: ; NOVSX: # %bb.0: # %entry -; NOVSX-NEXT: vcmpeqfp 0, 3, 3 -; NOVSX-NEXT: vcmpeqfp 1, 2, 2 +; NOVSX-NEXT: vcmpeqfp 5, 3, 3 +; NOVSX-NEXT: vcmpeqfp 0, 2, 2 ; NOVSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha ; NOVSX-NEXT: addi 3, 3, .LCPI4_0@toc@l +; NOVSX-NEXT: vnot 5, 5 +; NOVSX-NEXT: vnot 0, 0 +; NOVSX-NEXT: vcmpgtfp 4, 3, 2 +; NOVSX-NEXT: vor 5, 0, 5 +; NOVSX-NEXT: lvx 0, 0, 3 +; NOVSX-NEXT: vsel 4, 3, 2, 4 +; NOVSX-NEXT: vsel 4, 4, 0, 5 +; NOVSX-NEXT: vxor 5, 5, 5 +; NOVSX-NEXT: vcmpequw 0, 2, 5 ; NOVSX-NEXT: vnot 0, 0 -; NOVSX-NEXT: vnot 1, 1 -; NOVSX-NEXT: vspltisb 4, -1 -; NOVSX-NEXT: vcmpgtfp 5, 3, 2 -; NOVSX-NEXT: vslw 4, 4, 4 -; NOVSX-NEXT: vor 0, 1, 0 -; NOVSX-NEXT: lvx 1, 0, 3 -; NOVSX-NEXT: vsel 5, 3, 2, 5 -; NOVSX-NEXT: vsel 5, 5, 1, 0 -; NOVSX-NEXT: vcmpequw 0, 2, 4 -; NOVSX-NEXT: vcmpequw 4, 3, 4 -; NOVSX-NEXT: vsel 2, 5, 2, 0 -; NOVSX-NEXT: vsel 2, 2, 3, 4 -; NOVSX-NEXT: vxor 3, 3, 3 -; NOVSX-NEXT: vcmpeqfp 3, 5, 3 -; NOVSX-NEXT: vsel 2, 5, 2, 3 +; NOVSX-NEXT: vsel 2, 4, 2, 0 +; NOVSX-NEXT: vcmpequw 0, 3, 5 +; NOVSX-NEXT: vnot 0, 0 +; NOVSX-NEXT: vsel 2, 2, 3, 0 +; NOVSX-NEXT: vcmpeqfp 3, 4, 5 +; NOVSX-NEXT: vsel 2, 4, 2, 3 ; NOVSX-NEXT: blr ; ; VSX-LABEL: v4f32_minimum: @@ -301,21 +299,21 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; VSX-NEXT: xvcmpeqsp 1, 35, 35 ; VSX-NEXT: xvcmpeqsp 2, 34, 34 ; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; VSX-NEXT: xxleqv 36, 36, 36 -; VSX-NEXT: xvminsp 0, 34, 35 -; VSX-NEXT: vslw 4, 4, 4 ; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l ; VSX-NEXT: xxlnor 1, 1, 1 ; VSX-NEXT: xxlnor 2, 2, 2 +; VSX-NEXT: xxlxor 36, 36, 36 +; VSX-NEXT: xvminsp 0, 34, 35 ; VSX-NEXT: vcmpequw 5, 2, 4 ; VSX-NEXT: xxlor 1, 2, 1 ; VSX-NEXT: lxvd2x 2, 0, 3 ; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxlxor 2, 2, 2 -; VSX-NEXT: xvcmpeqsp 2, 0, 2 -; VSX-NEXT: xxsel 1, 0, 34, 37 +; VSX-NEXT: xxlnor 1, 37, 37 +; VSX-NEXT: xxsel 1, 0, 34, 1 ; VSX-NEXT: vcmpequw 2, 3, 4 -; VSX-NEXT: xxsel 1, 1, 35, 34 +; VSX-NEXT: xxlnor 2, 34, 34 +; VSX-NEXT: xxsel 1, 1, 35, 2 +; VSX-NEXT: xvcmpeqsp 2, 0, 36 ; VSX-NEXT: xxsel 34, 0, 1, 2 ; VSX-NEXT: blr ; @@ -324,20 +322,20 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) { ; AIX-NEXT: xvcmpeqsp 1, 35, 35 ; AIX-NEXT: xvcmpeqsp 2, 34, 34 ; AIX-NEXT: ld 3, L..C4(2) # %const.0 -; AIX-NEXT: xxleqv 36, 36, 36 +; AIX-NEXT: xxlxor 36, 36, 36 ; AIX-NEXT: xvminsp 0, 34, 35 -; AIX-NEXT: vslw 4, 4, 4 ; AIX-NEXT: xxlnor 1, 1, 1 ; AIX-NEXT: xxlnor 2, 2, 2 ; AIX-NEXT: vcmpequw 5, 2, 4 ; AIX-NEXT: xxlor 1, 2, 1 ; AIX-NEXT: lxvw4x 2, 0, 3 ; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxlxor 2, 2, 2 -; AIX-NEXT: xvcmpeqsp 2, 0, 2 -; AIX-NEXT: xxsel 1, 0, 34, 37 +; AIX-NEXT: xxlnor 1, 37, 37 +; AIX-NEXT: xxsel 1, 0, 34, 1 ; AIX-NEXT: vcmpequw 2, 3, 4 -; AIX-NEXT: xxsel 1, 1, 35, 34 +; AIX-NEXT: xxlnor 2, 34, 34 +; AIX-NEXT: xxsel 1, 1, 35, 2 +; AIX-NEXT: xvcmpeqsp 2, 0, 36 ; AIX-NEXT: xxsel 34, 0, 1, 2 ; AIX-NEXT: blr entry: @@ -417,78 +415,76 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; NOVSX-LABEL: v2f64_minimum: ; NOVSX: # %bb.0: # %entry ; NOVSX-NEXT: fcmpu 0, 1, 3 -; NOVSX-NEXT: fmr 6, 1 -; NOVSX-NEXT: stfd 4, -16(1) -; NOVSX-NEXT: stfd 2, -8(1) -; NOVSX-NEXT: stfd 3, -32(1) -; NOVSX-NEXT: stfd 1, -24(1) +; NOVSX-NEXT: fmr 0, 1 +; NOVSX-NEXT: stfd 1, -16(1) +; NOVSX-NEXT: stfd 3, -8(1) +; NOVSX-NEXT: stfd 2, -32(1) +; NOVSX-NEXT: stfd 4, -24(1) ; NOVSX-NEXT: bc 12, 0, .LBB6_2 ; NOVSX-NEXT: # %bb.1: # %entry -; NOVSX-NEXT: fmr 6, 3 +; NOVSX-NEXT: fmr 1, 3 ; NOVSX-NEXT: .LBB6_2: # %entry ; NOVSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; NOVSX-NEXT: ld 4, -24(1) -; NOVSX-NEXT: lfs 0, .LCPI6_0@toc@l(3) -; NOVSX-NEXT: fmr 5, 0 +; NOVSX-NEXT: lfs 5, .LCPI6_0@toc@l(3) +; NOVSX-NEXT: fmr 6, 5 ; NOVSX-NEXT: bc 12, 3, .LBB6_4 ; NOVSX-NEXT: # %bb.3: # %entry -; NOVSX-NEXT: fmr 5, 6 +; NOVSX-NEXT: fmr 6, 1 ; NOVSX-NEXT: .LBB6_4: # %entry -; NOVSX-NEXT: li 3, 1 -; NOVSX-NEXT: ld 5, -32(1) -; NOVSX-NEXT: rldic 3, 3, 63, 0 -; NOVSX-NEXT: cmpd 4, 3 +; NOVSX-NEXT: ld 3, -16(1) +; NOVSX-NEXT: fmr 1, 6 +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB6_6 ; NOVSX-NEXT: # %bb.5: # %entry -; NOVSX-NEXT: fmr 1, 5 +; NOVSX-NEXT: fmr 1, 0 ; NOVSX-NEXT: .LBB6_6: # %entry -; NOVSX-NEXT: cmpd 5, 3 +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB6_8 ; NOVSX-NEXT: # %bb.7: # %entry -; NOVSX-NEXT: fmr 3, 1 +; NOVSX-NEXT: fmr 1, 3 ; NOVSX-NEXT: .LBB6_8: # %entry -; NOVSX-NEXT: addis 4, 2, .LCPI6_1@toc@ha -; NOVSX-NEXT: lfs 1, .LCPI6_1@toc@l(4) -; NOVSX-NEXT: fcmpu 0, 5, 1 +; NOVSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha +; NOVSX-NEXT: lfs 3, .LCPI6_1@toc@l(3) +; NOVSX-NEXT: fcmpu 0, 6, 3 ; NOVSX-NEXT: bc 12, 2, .LBB6_10 ; NOVSX-NEXT: # %bb.9: # %entry -; NOVSX-NEXT: fmr 3, 5 +; NOVSX-NEXT: fmr 1, 6 ; NOVSX-NEXT: .LBB6_10: # %entry ; NOVSX-NEXT: fcmpu 0, 2, 4 -; NOVSX-NEXT: fmr 5, 2 +; NOVSX-NEXT: fmr 0, 2 ; NOVSX-NEXT: bc 12, 0, .LBB6_12 ; NOVSX-NEXT: # %bb.11: # %entry -; NOVSX-NEXT: fmr 5, 4 +; NOVSX-NEXT: fmr 0, 4 ; NOVSX-NEXT: .LBB6_12: # %entry -; NOVSX-NEXT: ld 5, -8(1) ; NOVSX-NEXT: bc 12, 3, .LBB6_14 ; NOVSX-NEXT: # %bb.13: # %entry -; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: fmr 5, 0 ; NOVSX-NEXT: .LBB6_14: # %entry -; NOVSX-NEXT: cmpd 5, 3 -; NOVSX-NEXT: ld 4, -16(1) -; NOVSX-NEXT: bc 4, 2, .LBB6_19 +; NOVSX-NEXT: ld 3, -32(1) +; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 12, 2, .LBB6_16 ; NOVSX-NEXT: # %bb.15: # %entry -; NOVSX-NEXT: cmpd 4, 3 -; NOVSX-NEXT: bc 4, 2, .LBB6_20 +; NOVSX-NEXT: fmr 0, 2 ; NOVSX-NEXT: .LBB6_16: # %entry -; NOVSX-NEXT: fcmpu 0, 0, 1 -; NOVSX-NEXT: bc 12, 2, .LBB6_18 -; NOVSX-NEXT: .LBB6_17: # %entry -; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: ld 3, -24(1) +; NOVSX-NEXT: cmpdi 3, 0 +; NOVSX-NEXT: bc 4, 2, .LBB6_19 +; NOVSX-NEXT: # %bb.17: # %entry +; NOVSX-NEXT: fcmpu 0, 5, 3 +; NOVSX-NEXT: bc 4, 2, .LBB6_20 ; NOVSX-NEXT: .LBB6_18: # %entry -; NOVSX-NEXT: fmr 1, 3 -; NOVSX-NEXT: fmr 2, 4 +; NOVSX-NEXT: fmr 2, 0 ; NOVSX-NEXT: blr ; NOVSX-NEXT: .LBB6_19: # %entry -; NOVSX-NEXT: fmr 2, 0 -; NOVSX-NEXT: cmpd 4, 3 -; NOVSX-NEXT: bc 12, 2, .LBB6_16 +; NOVSX-NEXT: fmr 0, 4 +; NOVSX-NEXT: fcmpu 0, 5, 3 +; NOVSX-NEXT: bc 12, 2, .LBB6_18 ; NOVSX-NEXT: .LBB6_20: # %entry -; NOVSX-NEXT: fmr 4, 2 -; NOVSX-NEXT: fcmpu 0, 0, 1 -; NOVSX-NEXT: bc 4, 2, .LBB6_17 -; NOVSX-NEXT: b .LBB6_18 +; NOVSX-NEXT: fmr 0, 5 +; NOVSX-NEXT: fmr 2, 0 +; NOVSX-NEXT: blr ; ; VSX-LABEL: v2f64_minimum: ; VSX: # %bb.0: # %entry @@ -500,39 +496,38 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) { ; VSX-NEXT: xxlnor 37, 37, 37 ; VSX-NEXT: xvmindp 0, 34, 35 ; VSX-NEXT: lxvd2x 2, 0, 3 -; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha ; VSX-NEXT: xxlor 1, 37, 36 -; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l -; VSX-NEXT: lxvd2x 36, 0, 3 +; VSX-NEXT: xxlxor 36, 36, 36 ; VSX-NEXT: vcmpequd 5, 2, 4 +; VSX-NEXT: xxlnor 37, 37, 37 ; VSX-NEXT: xxsel 0, 0, 2, 1 -; VSX-NEXT: xxlxor 2, 2, 2 ; VSX-NEXT: xxsel 1, 0, 34, 37 ; VSX-NEXT: vcmpequd 2, 3, 4 +; VSX-NEXT: xxlnor 34, 34, 34 ; VSX-NEXT: xxsel 1, 1, 35, 34 -; VSX-NEXT: xvcmpeqdp 34, 0, 2 +; VSX-NEXT: xvcmpeqdp 34, 0, 36 ; VSX-NEXT: xxsel 34, 0, 1, 34 ; VSX-NEXT: blr ; ; AIX-LABEL: v2f64_minimum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: ld 3, L..C6(2) # %const.0 ; AIX-NEXT: xvcmpeqdp 36, 35, 35 ; AIX-NEXT: xvcmpeqdp 37, 34, 34 +; AIX-NEXT: ld 3, L..C6(2) # %const.0 ; AIX-NEXT: lxvd2x 2, 0, 3 -; AIX-NEXT: ld 3, L..C7(2) # %const.1 ; AIX-NEXT: xxlnor 36, 36, 36 ; AIX-NEXT: xxlnor 37, 37, 37 ; AIX-NEXT: xvmindp 0, 34, 35 ; AIX-NEXT: xxlor 1, 37, 36 -; AIX-NEXT: lxvd2x 36, 0, 3 +; AIX-NEXT: xxlxor 36, 36, 36 ; AIX-NEXT: vcmpequd 5, 2, 4 +; AIX-NEXT: xxlnor 37, 37, 37 ; AIX-NEXT: xxsel 0, 0, 2, 1 -; AIX-NEXT: xxlxor 2, 2, 2 ; AIX-NEXT: xxsel 1, 0, 34, 37 ; AIX-NEXT: vcmpequd 2, 3, 4 +; AIX-NEXT: xxlnor 34, 34, 34 ; AIX-NEXT: xxsel 1, 1, 35, 34 -; AIX-NEXT: xvcmpeqdp 34, 0, 2 +; AIX-NEXT: xvcmpeqdp 34, 0, 36 ; AIX-NEXT: xxsel 34, 0, 1, 34 ; AIX-NEXT: blr entry: @@ -545,29 +540,29 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; NOVSX: # %bb.0: # %entry ; NOVSX-NEXT: fcmpu 0, 1, 3 ; NOVSX-NEXT: fmr 6, 1 -; NOVSX-NEXT: stfd 4, -16(1) -; NOVSX-NEXT: stfd 2, -8(1) -; NOVSX-NEXT: stfd 3, -32(1) -; NOVSX-NEXT: stfd 1, -24(1) +; NOVSX-NEXT: stfd 1, -16(1) +; NOVSX-NEXT: stfd 3, -8(1) +; NOVSX-NEXT: stfd 2, -32(1) +; NOVSX-NEXT: stfd 4, -24(1) ; NOVSX-NEXT: bc 12, 1, .LBB7_2 ; NOVSX-NEXT: # %bb.1: # %entry ; NOVSX-NEXT: fmr 6, 3 ; NOVSX-NEXT: .LBB7_2: # %entry -; NOVSX-NEXT: addis 4, 2, .LCPI7_0@toc@ha -; NOVSX-NEXT: ld 3, -24(1) -; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(4) +; NOVSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha +; NOVSX-NEXT: lfs 0, .LCPI7_0@toc@l(3) ; NOVSX-NEXT: fmr 5, 0 ; NOVSX-NEXT: bc 12, 3, .LBB7_4 ; NOVSX-NEXT: # %bb.3: # %entry ; NOVSX-NEXT: fmr 5, 6 ; NOVSX-NEXT: .LBB7_4: # %entry +; NOVSX-NEXT: ld 3, -16(1) ; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: ld 4, -32(1) ; NOVSX-NEXT: bc 12, 2, .LBB7_6 ; NOVSX-NEXT: # %bb.5: # %entry ; NOVSX-NEXT: fmr 1, 5 ; NOVSX-NEXT: .LBB7_6: # %entry -; NOVSX-NEXT: cmpdi 4, 0 +; NOVSX-NEXT: ld 3, -8(1) +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB7_8 ; NOVSX-NEXT: # %bb.7: # %entry ; NOVSX-NEXT: fmr 3, 1 @@ -585,35 +580,30 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; NOVSX-NEXT: # %bb.11: # %entry ; NOVSX-NEXT: fmr 5, 4 ; NOVSX-NEXT: .LBB7_12: # %entry -; NOVSX-NEXT: ld 4, -8(1) ; NOVSX-NEXT: bc 12, 3, .LBB7_14 ; NOVSX-NEXT: # %bb.13: # %entry ; NOVSX-NEXT: fmr 0, 5 ; NOVSX-NEXT: .LBB7_14: # %entry -; NOVSX-NEXT: cmpdi 4, 0 -; NOVSX-NEXT: ld 3, -16(1) -; NOVSX-NEXT: bc 4, 2, .LBB7_19 -; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: ld 3, -32(1) ; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 4, 2, .LBB7_20 +; NOVSX-NEXT: bc 12, 2, .LBB7_16 +; NOVSX-NEXT: # %bb.15: # %entry +; NOVSX-NEXT: fmr 2, 0 ; NOVSX-NEXT: .LBB7_16: # %entry -; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: ld 3, -24(1) +; NOVSX-NEXT: cmpdi 3, 0 ; NOVSX-NEXT: bc 12, 2, .LBB7_18 -; NOVSX-NEXT: .LBB7_17: # %entry -; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: # %bb.17: # %entry +; NOVSX-NEXT: fmr 4, 2 ; NOVSX-NEXT: .LBB7_18: # %entry +; NOVSX-NEXT: fcmpu 0, 0, 1 +; NOVSX-NEXT: bc 12, 2, .LBB7_20 +; NOVSX-NEXT: # %bb.19: # %entry +; NOVSX-NEXT: fmr 4, 0 +; NOVSX-NEXT: .LBB7_20: # %entry ; NOVSX-NEXT: fmr 1, 3 ; NOVSX-NEXT: fmr 2, 4 ; NOVSX-NEXT: blr -; NOVSX-NEXT: .LBB7_19: # %entry -; NOVSX-NEXT: fmr 2, 0 -; NOVSX-NEXT: cmpdi 3, 0 -; NOVSX-NEXT: bc 12, 2, .LBB7_16 -; NOVSX-NEXT: .LBB7_20: # %entry -; NOVSX-NEXT: fmr 4, 2 -; NOVSX-NEXT: fcmpu 0, 0, 1 -; NOVSX-NEXT: bc 4, 2, .LBB7_17 -; NOVSX-NEXT: b .LBB7_18 ; ; VSX-LABEL: v2f64_maximum: ; VSX: # %bb.0: # %entry @@ -638,7 +628,7 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) { ; ; AIX-LABEL: v2f64_maximum: ; AIX: # %bb.0: # %entry -; AIX-NEXT: ld 3, L..C8(2) # %const.0 +; AIX-NEXT: ld 3, L..C7(2) # %const.0 ; AIX-NEXT: xvcmpeqdp 36, 35, 35 ; AIX-NEXT: xvcmpeqdp 37, 34, 34 ; AIX-NEXT: lxvd2x 2, 0, 3 diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll new file mode 100644 index 0000000000000..dea455a692420 --- /dev/null +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum-i686.ll @@ -0,0 +1,530 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s + +define half @maximum_half(half %x, half %y) nounwind { +; CHECK-LABEL: maximum_half: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __extendhfsf2 +; CHECK-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-NEXT: calll __extendhfsf2 +; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fsts {{[0-9]+}}(%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: ja .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB0_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB0_6: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB0_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: .LBB0_8: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: jp .LBB0_13 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: jmp .LBB0_12 +; CHECK-NEXT: .LBB0_9: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB0_12: +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB0_13: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fstps (%esp) +; CHECK-NEXT: calll __truncsfhf2 +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl + %res = call half @llvm.maximum.f16(half %x, half %y) + ret half %res +} + +define float @maximum_float(float %x, float %y) nounwind { +; CHECK-LABEL: maximum_float: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: fsts (%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fsts {{[0-9]+}}(%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: ja .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: cmpl $0, (%esp) +; CHECK-NEXT: je .LBB1_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB1_6: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB1_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: .LBB1_8: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB1_9 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: jp .LBB1_13 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: jmp .LBB1_12 +; CHECK-NEXT: .LBB1_9: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB1_12: +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB1_13: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: retl + %res = call float @llvm.maximum.f32(float %x, float %y) + ret float %res +} + +define double @maximum_double(double %x, double %y) nounwind { +; CHECK-LABEL: maximum_double: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: andl $-8, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: fldl 16(%ebp) +; CHECK-NEXT: fldl 8(%ebp) +; CHECK-NEXT: fstl {{[0-9]+}}(%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstl (%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: ja .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB2_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB2_6: +; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB2_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: .LBB2_8: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB2_9 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: jp .LBB2_13 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: jmp .LBB2_12 +; CHECK-NEXT: .LBB2_9: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: .LBB2_12: +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: .LBB2_13: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl + %res = call double @llvm.maximum.f64(double %x, double %y) + ret double %res +} + +define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind { +; CHECK-LABEL: maximum_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl 8(%ebp), %esi +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: pushl 40(%ebp) +; CHECK-NEXT: pushl 36(%ebp) +; CHECK-NEXT: pushl 32(%ebp) +; CHECK-NEXT: pushl 28(%ebp) +; CHECK-NEXT: pushl 24(%ebp) +; CHECK-NEXT: pushl 20(%ebp) +; CHECK-NEXT: pushl 16(%ebp) +; CHECK-NEXT: pushl 12(%ebp) +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll fmaximuml +; CHECK-NEXT: addl $32, %esp +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl %edi, 12(%esi) +; CHECK-NEXT: movl %edx, 8(%esi) +; CHECK-NEXT: movl %ecx, 4(%esi) +; CHECK-NEXT: movl %eax, (%esi) +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl $4 + %res = call fp128 @llvm.maximum.f128(fp128 %x, fp128 %y) + ret fp128 %res +} + +define half @minimum_half(half %x, half %y) nounwind { +; CHECK-LABEL: minimum_half: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, (%esp) +; CHECK-NEXT: calll __extendhfsf2 +; CHECK-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-NEXT: calll __extendhfsf2 +; CHECK-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fsts {{[0-9]+}}(%esp) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: ja .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB4_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(3) +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: fstp %st(3) +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jne .LBB4_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB4_6: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jne .LBB4_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB4_8: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB4_9 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: jp .LBB4_13 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: jmp .LBB4_12 +; CHECK-NEXT: .LBB4_9: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: .LBB4_12: +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB4_13: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fstps (%esp) +; CHECK-NEXT: calll __truncsfhf2 +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl + %res = call half @llvm.minimum.f16(half %x, half %y) + ret half %res +} + +define float @minimum_float(float %x, float %y) nounwind { +; CHECK-LABEL: minimum_float: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: fsts (%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fsts {{[0-9]+}}(%esp) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: ja .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB5_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(3) +; CHECK-NEXT: .LBB5_4: +; CHECK-NEXT: fstp %st(3) +; CHECK-NEXT: cmpl $0, (%esp) +; CHECK-NEXT: jne .LBB5_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB5_6: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jne .LBB5_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB5_8: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB5_9 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: jp .LBB5_13 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: jmp .LBB5_12 +; CHECK-NEXT: .LBB5_9: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: .LBB5_12: +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB5_13: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: retl + %res = call float @llvm.minimum.f32(float %x, float %y) + ret float %res +} + +define double @minimum_double(double %x, double %y) nounwind { +; CHECK-LABEL: minimum_double: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: andl $-8, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: fldl 16(%ebp) +; CHECK-NEXT: fldl 8(%ebp) +; CHECK-NEXT: fstl {{[0-9]+}}(%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstl (%esp) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: ja .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fucom %st(1) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: jp .LBB6_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(3) +; CHECK-NEXT: .LBB6_4: +; CHECK-NEXT: fstp %st(3) +; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jne .LBB6_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: .LBB6_6: +; CHECK-NEXT: cmpb $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jne .LBB6_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB6_8: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: fldz +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fucom %st(2) +; CHECK-NEXT: fstp %st(2) +; CHECK-NEXT: fnstsw %ax +; CHECK-NEXT: # kill: def $ah killed $ah killed $ax +; CHECK-NEXT: sahf +; CHECK-NEXT: jne .LBB6_9 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: jp .LBB6_13 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: fstp %st(1) +; CHECK-NEXT: jmp .LBB6_12 +; CHECK-NEXT: .LBB6_9: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: .LBB6_12: +; CHECK-NEXT: fldz +; CHECK-NEXT: .LBB6_13: +; CHECK-NEXT: fstp %st(0) +; CHECK-NEXT: movl %ebp, %esp +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl + %res = call double @llvm.minimum.f64(double %x, double %y) + ret double %res +} + +define fp128 @minimum_fp128(fp128 %x, fp128 %y) nounwind { +; CHECK-LABEL: minimum_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl 8(%ebp), %esi +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: pushl 40(%ebp) +; CHECK-NEXT: pushl 36(%ebp) +; CHECK-NEXT: pushl 32(%ebp) +; CHECK-NEXT: pushl 28(%ebp) +; CHECK-NEXT: pushl 24(%ebp) +; CHECK-NEXT: pushl 20(%ebp) +; CHECK-NEXT: pushl 16(%ebp) +; CHECK-NEXT: pushl 12(%ebp) +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: calll fminimuml +; CHECK-NEXT: addl $32, %esp +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl %edi, 12(%esi) +; CHECK-NEXT: movl %edx, 8(%esi) +; CHECK-NEXT: movl %ecx, 4(%esi) +; CHECK-NEXT: movl %eax, (%esi) +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl $4 + %res = call fp128 @llvm.minimum.f128(fp128 %x, fp128 %y) + ret fp128 %res +} diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll index 989aabc9e87bd..80e3a017a44e3 100644 --- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll +++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll @@ -2649,3 +2649,180 @@ define <4 x bfloat> @test_fmaximum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) { %r = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) ret <4 x bfloat> %r } + +define fp128 @maximum_fp128(fp128 %x, fp128 %y) nounwind { +; SSE2-LABEL: maximum_fp128: +; SSE2: # %bb.0: +; SSE2-NEXT: subq $88, %rsp +; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) +; SSE2-NEXT: callq __gttf2@PLT +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: jg .LBB35_2 +; SSE2-NEXT: # %bb.1: +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: .LBB35_2: +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __unordtf2@PLT +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: jne .LBB35_3 +; SSE2-NEXT: # %bb.4: +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: jmp .LBB35_5 +; SSE2-NEXT: .LBB35_3: +; SSE2-NEXT: movaps {{.*#+}} xmm0 = [NaN] +; SSE2-NEXT: .LBB35_5: +; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; SSE2-NEXT: je .LBB35_7 +; SSE2-NEXT: # %bb.6: +; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; SSE2-NEXT: .LBB35_7: +; SSE2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; SSE2-NEXT: je .LBB35_9 +; SSE2-NEXT: # %bb.8: +; SSE2-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; SSE2-NEXT: .LBB35_9: +; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: xorps %xmm1, %xmm1 +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: callq __eqtf2@PLT +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: je .LBB35_11 +; SSE2-NEXT: # %bb.10: +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: .LBB35_11: +; SSE2-NEXT: addq $88, %rsp +; SSE2-NEXT: retq +; +; AVX-LABEL: maximum_fp128: +; AVX: # %bb.0: +; AVX-NEXT: subq $88, %rsp +; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) +; AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) +; AVX-NEXT: callq __gttf2@PLT +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX-NEXT: testl %eax, %eax +; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX-NEXT: jg .LBB35_2 +; AVX-NEXT: # %bb.1: +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: .LBB35_2: +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX-NEXT: callq __unordtf2@PLT +; AVX-NEXT: testl %eax, %eax +; AVX-NEXT: jne .LBB35_3 +; AVX-NEXT: # %bb.4: +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX-NEXT: jmp .LBB35_5 +; AVX-NEXT: .LBB35_3: +; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [NaN] +; AVX-NEXT: .LBB35_5: +; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX-NEXT: je .LBB35_7 +; AVX-NEXT: # %bb.6: +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: .LBB35_7: +; AVX-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; AVX-NEXT: je .LBB35_9 +; AVX-NEXT: # %bb.8: +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: .LBB35_9: +; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: callq __eqtf2@PLT +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX-NEXT: testl %eax, %eax +; AVX-NEXT: je .LBB35_11 +; AVX-NEXT: # %bb.10: +; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX-NEXT: .LBB35_11: +; AVX-NEXT: addq $88, %rsp +; AVX-NEXT: retq +; +; AVX10_2-LABEL: maximum_fp128: +; AVX10_2: # %bb.0: +; AVX10_2-NEXT: subq $88, %rsp +; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX10_2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) +; AVX10_2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) +; AVX10_2-NEXT: callq __gttf2@PLT +; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX10_2-NEXT: testl %eax, %eax +; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: jg .LBB35_2 +; AVX10_2-NEXT: # %bb.1: +; AVX10_2-NEXT: vmovaps %xmm1, %xmm0 +; AVX10_2-NEXT: .LBB35_2: +; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: callq __unordtf2@PLT +; AVX10_2-NEXT: testl %eax, %eax +; AVX10_2-NEXT: jne .LBB35_3 +; AVX10_2-NEXT: # %bb.4: +; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: jmp .LBB35_5 +; AVX10_2-NEXT: .LBB35_3: +; AVX10_2-NEXT: vmovaps {{.*#+}} xmm0 = [NaN] +; AVX10_2-NEXT: .LBB35_5: +; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX10_2-NEXT: je .LBB35_7 +; AVX10_2-NEXT: # %bb.6: +; AVX10_2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX10_2-NEXT: .LBB35_7: +; AVX10_2-NEXT: cmpb $0, {{[0-9]+}}(%rsp) +; AVX10_2-NEXT: je .LBB35_9 +; AVX10_2-NEXT: # %bb.8: +; AVX10_2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX10_2-NEXT: .LBB35_9: +; AVX10_2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX10_2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX10_2-NEXT: callq __eqtf2@PLT +; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: testl %eax, %eax +; AVX10_2-NEXT: je .LBB35_11 +; AVX10_2-NEXT: # %bb.10: +; AVX10_2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; AVX10_2-NEXT: .LBB35_11: +; AVX10_2-NEXT: addq $88, %rsp +; AVX10_2-NEXT: retq +; +; X86-LABEL: maximum_fp128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $80, %esp +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: vmovups 12(%ebp), %ymm0 +; X86-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: vzeroupper +; X86-NEXT: calll fmaximuml +; X86-NEXT: subl $4, %esp +; X86-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: vmovaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 + %res = call fp128 @llvm.maximum.f128(fp128 %x, fp128 %y) + ret fp128 %res +}