diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index db50f132b1349..c6e1764edffc3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42524,6 +42524,26 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); + } + // Conversions. + // TODO: Add more CVT opcodes when we have test coverage. + case X86ISD::CVTTP2SI: + case X86ISD::CVTTP2UI: + case X86ISD::CVTPH2PS: { + SDLoc DL(Op); + unsigned Scale = SizeInBits / ExtSizeInBits; + SDValue SrcOp = Op.getOperand(0); + MVT SrcVT = SrcOp.getSimpleValueType(); + unsigned SrcExtSize = + std::max(SrcVT.getSizeInBits() / Scale, 128); + MVT ExtVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(), + ExtSizeInBits / VT.getScalarSizeInBits()); + SDValue ExtOp = TLO.DAG.getNode( + Opc, DL, ExtVT, extractSubVector(SrcOp, 0, TLO.DAG, DL, SrcExtSize)); + SDValue UndefVec = TLO.DAG.getUNDEF(VT); + SDValue Insert = + insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); + return TLO.CombineTo(Op, Insert); } // Zero upper elements. case X86ISD::VZEXT_MOVL: diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index e87814ebb1dbe..ef0f3f3e816df 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -4990,6 +4990,7 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind { ret <4 x i32> %ext } +; PR83402 define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind { ; AVX-LABEL: fptosi_4f16_to_4i32: ; AVX: # %bb.0: @@ -5024,16 +5025,14 @@ define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind { ; ; F16C-LABEL: fptosi_4f16_to_4i32: ; F16C: # %bb.0: -; F16C-NEXT: vcvtph2ps %xmm0, %ymm0 +; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; F16C-NEXT: vcvttps2dq %xmm0, %xmm0 -; F16C-NEXT: vzeroupper ; F16C-NEXT: retq ; ; AVX512-LABEL: fptosi_4f16_to_4i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = fptosi <4 x half> %a to <4 x i32> ret <4 x i32> %cvt @@ -5213,13 +5212,12 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind { ; ; F16C-LABEL: fptoui_4f16_to_4i32: ; F16C: # %bb.0: -; F16C-NEXT: vcvtph2ps %xmm0, %ymm0 -; F16C-NEXT: vcvttps2dq %ymm0, %ymm1 -; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; F16C-NEXT: vcvttps2dq %ymm0, %ymm0 +; F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; F16C-NEXT: vcvttps2dq %xmm0, %xmm1 +; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; F16C-NEXT: vcvttps2dq %xmm0, %xmm0 ; F16C-NEXT: vorps %xmm0, %xmm1, %xmm0 ; F16C-NEXT: vblendvps %xmm1, %xmm0, %xmm1, %xmm0 -; F16C-NEXT: vzeroupper ; F16C-NEXT: retq ; ; AVX512F-LABEL: fptoui_4f16_to_4i32: @@ -5232,9 +5230,8 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind { ; ; AVX512-FASTLANE-LABEL: fptoui_4f16_to_4i32: ; AVX512-FASTLANE: # %bb.0: -; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0 -; AVX512-FASTLANE-NEXT: vzeroupper ; AVX512-FASTLANE-NEXT: retq %cvt = fptoui <4 x half> %a to <4 x i32> ret <4 x i32> %cvt