-
Notifications
You must be signed in to change notification settings - Fork 13.4k
AMDGPU: Fix handling of -0 in round lowering #65761
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
That's not visible in any of the codegen tests is it? We should have something that shows the final codegen for globalisel. Code change looks good. |
Yes, this one is inconsistently tested and only has the mir legalizer test |
e9e1ed6
to
71f3a06
Compare
@llvm/pr-subscribers-backend-amdgpu ChangesMove the select of the offset to the copysign input, so we end up with -0 + -0 which preserves the sign. The GlobalISel output leaves something to be desired, it emits some trivially foldable bit ops and also fails to form BFI. Fixes #65629Patch is 393.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/65761.diff 9 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index cfb95955d1f888b..5fcebf2b8df59cd 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -6575,23 +6575,25 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { // round(x) => // t = trunc(x); // d = fabs(x - t); - // o = copysign(1.0f, x); - // return t + (d >= 0.5 ? o : 0.0); + // o = copysign(d >= 0.5 ? 1.0 : 0.0, x); + // return t + o; auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); - auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); - auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Half = MIRBuilder.buildFConstant(Ty, 0.5); - auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); + auto Cmp = + MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags); - auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, - Flags); - auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); + // Could emit G_UITOFP instead + auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); + auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero); + auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X); - MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); + MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags); MI.eraseFromParent(); return Legalized; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d84a50975430514..02332a9fe9c0c40 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2429,18 +2429,16 @@ SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { const SDValue Zero = DAG.getConstantFP(0.0, SL, VT); const SDValue One = DAG.getConstantFP(1.0, SL, VT); - const SDValue Half = DAG.getConstantFP(0.5, SL, VT); - - SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + const SDValue Half = DAG.getConstantFP(0.5, SL, VT); SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE); + SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero); - SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero); - - return DAG.getNode(ISD::FADD, SL, VT, T, Sel); + SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X); + return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset); } SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir index 1a70e500d36f54c..b97d04e809a6f7d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -18,16 +18,19 @@ body: | ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX8-LABEL: name: test_intrinsic_round_s32 ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -35,16 +38,19 @@ body: | ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_intrinsic_round_s32 ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -52,15 +58,17 @@ body: | ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_INTRINSIC_ROUND %0 @@ -80,16 +88,19 @@ body: | ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX8-LABEL: name: test_intrinsic_round_s32_flags ; GFX8: liveins: $vgpr0 ; GFX8-NEXT: {{ $}} @@ -97,16 +108,19 @@ body: | ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) + ; ; GFX9-LABEL: name: test_intrinsic_round_s32_flags ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} @@ -114,15 +128,17 @@ body: | ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SELECT]], [[C4]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[AND1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nsz G_INTRINSIC_ROUND %0 @@ -162,16 +178,19 @@ body: | ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C10]] - ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] - ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] - ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] - ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C8]] + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C9]], [[C10]] + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SELECT2]], [[C12]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C11]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND2]], [[AND3]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[OR]] ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; ; GFX8-LABEL: name: test_intrinsic_round_s64 ; GFX8: liveins: $vgpr0_vgpr1 ; GFX8-NEXT: {{ $}} @@ -180,16 +199,19 @@ body: | ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] - ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[SELECT]], [[C4]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[OR]] ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; ; GFX9-LABEL: name: test_intrinsic_round_s64 ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} @@ -198,15 +220,17 @@ body: | ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[AND:%[0-9... |
@@ -41,6 +42,57 @@ define amdgpu_kernel void @v_fneg_add_f32(ptr addrspace(1) %out, ptr addrspace(1 | |||
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]] | |||
; GCN-NEXT: s_waitcnt vmcnt(0) | |||
define amdgpu_kernel void @v_fneg_add_store_use_add_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this test pass? Looks like you should have deleted the manual checks above this line, now that there are autogenerated checks below this line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't supposed to be here
Move the select of the offset to the copysign input, so we end up with -0 + -0 which preserves the sign. The GlobalISel output leaves something to be desired, it emits some trivially foldable bit ops and also fails to form BFI. Fixes llvm#65629
71f3a06
to
85fc1b5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This LGTM and it seems the CI is happy.
Move the select of the offset to the copysign input, so we end up with -0 + -0 which preserves the sign.
The GlobalISel output leaves something to be desired, it emits some trivially foldable bit ops and also fails to form BFI.
Fixes #65629