diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 8708489ac4fef..52e42932fc751 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2607,8 +2607,6 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) - /// - /// TODO: adapt this function to handle horizontal add/sub? void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I) { assert(I.arg_size() == 1 || I.arg_size() == 2); @@ -2617,8 +2615,8 @@ struct MemorySanitizerVisitor : public InstVisitor { FixedVectorType *ParamType = cast(I.getArgOperand(0)->getType()); - if (I.arg_size() == 2) - assert(ParamType == cast(I.getArgOperand(1)->getType())); + assert((I.arg_size() != 2) || + (ParamType == cast(I.getArgOperand(1)->getType()))); [[maybe_unused]] FixedVectorType *ReturnType = cast(I.getType()); assert(ParamType->getNumElements() * I.arg_size() == @@ -2656,6 +2654,82 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } + /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent + /// fields, with the parameters reinterpreted to have elements of a specified + /// width. For example: + /// @llvm.x86.ssse3.phadd.w(<1 x i64> [[VAR1]], <1 x i64> [[VAR2]]) + /// conceptually operates on + /// (<4 x i16> [[VAR1]], <4 x i16> [[VAR2]]) + /// and can be handled with ReinterpretElemWidth == 16. + void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I, + int ReinterpretElemWidth) { + assert(I.arg_size() == 1 || I.arg_size() == 2); + + assert(I.getType()->isVectorTy()); + assert(I.getArgOperand(0)->getType()->isVectorTy()); + + FixedVectorType *ParamType = + cast(I.getArgOperand(0)->getType()); + assert((I.arg_size() != 2) || + (ParamType == cast(I.getArgOperand(1)->getType()))); + + [[maybe_unused]] FixedVectorType *ReturnType = + cast(I.getType()); + assert(ParamType->getNumElements() * I.arg_size() == + 2 * ReturnType->getNumElements()); + + IRBuilder<> IRB(&I); + + unsigned TotalNumElems = ParamType->getNumElements() * I.arg_size(); + FixedVectorType *ReinterpretShadowTy = nullptr; + assert(isAligned(Align(ReinterpretElemWidth), + ParamType->getPrimitiveSizeInBits())); + ReinterpretShadowTy = FixedVectorType::get( + IRB.getIntNTy(ReinterpretElemWidth), + ParamType->getPrimitiveSizeInBits() / ReinterpretElemWidth); + TotalNumElems = ReinterpretShadowTy->getNumElements() * I.arg_size(); + + // Horizontal OR of shadow + SmallVector EvenMask; + SmallVector OddMask; + for (unsigned X = 0; X < TotalNumElems - 1; X += 2) { + EvenMask.push_back(X); + OddMask.push_back(X + 1); + } + + Value *FirstArgShadow = getShadow(&I, 0); + FirstArgShadow = IRB.CreateBitCast(FirstArgShadow, ReinterpretShadowTy); + + // If we had two parameters each with an odd number of elements, the total + // number of elements is even, but we have never seen this in extant + // instruction sets, so we enforce that each parameter must have an even + // number of elements. + assert(isAligned( + Align(2), + cast(FirstArgShadow->getType())->getNumElements())); + + Value *EvenShadow; + Value *OddShadow; + if (I.arg_size() == 2) { + Value *SecondArgShadow = getShadow(&I, 1); + SecondArgShadow = IRB.CreateBitCast(SecondArgShadow, ReinterpretShadowTy); + + EvenShadow = + IRB.CreateShuffleVector(FirstArgShadow, SecondArgShadow, EvenMask); + OddShadow = + IRB.CreateShuffleVector(FirstArgShadow, SecondArgShadow, OddMask); + } else { + EvenShadow = IRB.CreateShuffleVector(FirstArgShadow, EvenMask); + OddShadow = IRB.CreateShuffleVector(FirstArgShadow, OddMask); + } + + Value *OrShadow = IRB.CreateOr(EvenShadow, OddShadow); + OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I)); + + setShadow(&I, OrShadow); + setOriginForNaryOp(I); + } + void visitFNeg(UnaryOperator &I) { handleShadowOr(I); } // Handle multiplication by constant. @@ -4156,87 +4230,6 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - void handleAVXHorizontalAddSubIntrinsic(IntrinsicInst &I) { - // Approximation only: - // output = horizontal_add/sub(A, B) - // => shadow[output] = horizontal_add(shadow[A], shadow[B]) - // - // We always use horizontal add instead of subtract, because subtracting - // a fully uninitialized shadow would result in a fully initialized shadow. - // - // - If we add two adjacent zero (initialized) shadow values, the - // result always be zero i.e., no false positives. - // - If we add two shadows, one of which is uninitialized, the - // result will always be non-zero i.e., no false negatives. - // - However, we can have false negatives if we do an addition that wraps - // to zero; we consider this an acceptable tradeoff for performance. - // - // To make shadow propagation precise, we want the equivalent of - // "horizontal OR", but this is not available for SSE3/SSSE3/AVX/AVX2. - - Intrinsic::ID shadowIntrinsicID = I.getIntrinsicID(); - - switch (I.getIntrinsicID()) { - case Intrinsic::x86_sse3_hsub_ps: - shadowIntrinsicID = Intrinsic::x86_sse3_hadd_ps; - break; - - case Intrinsic::x86_sse3_hsub_pd: - shadowIntrinsicID = Intrinsic::x86_sse3_hadd_pd; - break; - - case Intrinsic::x86_ssse3_phsub_d: - shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d; - break; - - case Intrinsic::x86_ssse3_phsub_d_128: - shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_d_128; - break; - - case Intrinsic::x86_ssse3_phsub_w: - shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w; - break; - - case Intrinsic::x86_ssse3_phsub_w_128: - shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_w_128; - break; - - case Intrinsic::x86_ssse3_phsub_sw: - shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw; - break; - - case Intrinsic::x86_ssse3_phsub_sw_128: - shadowIntrinsicID = Intrinsic::x86_ssse3_phadd_sw_128; - break; - - case Intrinsic::x86_avx_hsub_pd_256: - shadowIntrinsicID = Intrinsic::x86_avx_hadd_pd_256; - break; - - case Intrinsic::x86_avx_hsub_ps_256: - shadowIntrinsicID = Intrinsic::x86_avx_hadd_ps_256; - break; - - case Intrinsic::x86_avx2_phsub_d: - shadowIntrinsicID = Intrinsic::x86_avx2_phadd_d; - break; - - case Intrinsic::x86_avx2_phsub_w: - shadowIntrinsicID = Intrinsic::x86_avx2_phadd_w; - break; - - case Intrinsic::x86_avx2_phsub_sw: - shadowIntrinsicID = Intrinsic::x86_avx2_phadd_sw; - break; - - default: - break; - } - - return handleIntrinsicByApplyingToShadow(I, shadowIntrinsicID, - /*trailingVerbatimArgs*/ 0); - } - /// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4}, /// and vst{2,3,4}lane). /// @@ -4783,33 +4776,49 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVtestIntrinsic(I); break; - case Intrinsic::x86_sse3_hadd_ps: - case Intrinsic::x86_sse3_hadd_pd: - case Intrinsic::x86_ssse3_phadd_d: - case Intrinsic::x86_ssse3_phadd_d_128: + // Packed Horizontal Add/Subtract case Intrinsic::x86_ssse3_phadd_w: case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_ssse3_phsub_w: + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_avx2_phsub_w: { + handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16); + break; + } + + // Packed Horizontal Add/Subtract + case Intrinsic::x86_ssse3_phadd_d: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_d: + case Intrinsic::x86_ssse3_phsub_d: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_d: { + handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/32); + break; + } + + // Packed Horizontal Add/Subtract and Saturate case Intrinsic::x86_ssse3_phadd_sw: case Intrinsic::x86_ssse3_phadd_sw_128: + case Intrinsic::x86_avx2_phadd_sw: + case Intrinsic::x86_ssse3_phsub_sw: + case Intrinsic::x86_ssse3_phsub_sw_128: + case Intrinsic::x86_avx2_phsub_sw: { + handlePairwiseShadowOrIntrinsic(I, /*ReinterpretElemWidth=*/16); + break; + } + + // Packed Single/Double Precision Floating-Point Horizontal Add + case Intrinsic::x86_sse3_hadd_ps: + case Intrinsic::x86_sse3_hadd_pd: case Intrinsic::x86_avx_hadd_pd_256: case Intrinsic::x86_avx_hadd_ps_256: - case Intrinsic::x86_avx2_phadd_d: - case Intrinsic::x86_avx2_phadd_w: - case Intrinsic::x86_avx2_phadd_sw: case Intrinsic::x86_sse3_hsub_ps: case Intrinsic::x86_sse3_hsub_pd: - case Intrinsic::x86_ssse3_phsub_d: - case Intrinsic::x86_ssse3_phsub_d_128: - case Intrinsic::x86_ssse3_phsub_w: - case Intrinsic::x86_ssse3_phsub_w_128: - case Intrinsic::x86_ssse3_phsub_sw: - case Intrinsic::x86_ssse3_phsub_sw_128: case Intrinsic::x86_avx_hsub_pd_256: - case Intrinsic::x86_avx_hsub_ps_256: - case Intrinsic::x86_avx2_phsub_d: - case Intrinsic::x86_avx2_phsub_w: - case Intrinsic::x86_avx2_phsub_sw: { - handleAVXHorizontalAddSubIntrinsic(I); + case Intrinsic::x86_avx_hsub_ps_256: { + handlePairwiseShadowOrIntrinsic(I); break; } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index 26e9c39696f70..d85ab2c2c4bad 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -435,10 +435,9 @@ define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> -; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double> -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES1]] @@ -454,10 +453,9 @@ define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) # ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> -; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float> -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES1]] @@ -473,10 +471,9 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> -; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double> -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES1]] @@ -492,10 +489,9 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) # ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> -; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float> -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index 5597a9c96611f..f916130fe53e5 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -569,7 +569,9 @@ define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] @@ -585,7 +587,9 @@ define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] @@ -601,7 +605,9 @@ define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] @@ -617,7 +623,9 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] @@ -633,7 +641,9 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] @@ -649,7 +659,9 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index 2b6c6ff2e2b92..2ce4c49250d3f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -21,7 +21,13 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3339,7 +3345,13 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3379,7 +3391,13 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i32> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i32> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32> @@ -3419,7 +3437,13 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3459,7 +3483,13 @@ define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3499,7 +3529,13 @@ define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i32> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i32> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32> diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll index 08dd27ffcaaf1..c059a2f49617d 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll @@ -449,10 +449,9 @@ define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> -; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double> -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES1]] @@ -469,10 +468,9 @@ define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) # ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> -; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float> -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES1]] @@ -489,10 +487,9 @@ define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> -; CHECK-NEXT: [[A1:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double> -; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0]], <4 x double> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A2:%.*]], <4 x double> [[A3:%.*]]) ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES1]] @@ -509,10 +506,9 @@ define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) # ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> -; CHECK-NEXT: [[A1:%.*]] = bitcast <8 x i32> [[TMP2]] to <8 x float> -; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0]], <8 x float> [[A1]]) -; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A2:%.*]], <8 x float> [[A3:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll index 109166975eee1..5cc56baf0e0de 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll @@ -600,7 +600,9 @@ define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] @@ -617,7 +619,9 @@ define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] @@ -634,7 +638,9 @@ define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] @@ -651,7 +657,9 @@ define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] @@ -668,7 +676,9 @@ define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] @@ -685,7 +695,9 @@ define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]]) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP10]], [[TMP11]] ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index 15bd1755d479b..5aafe10fd575a 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -22,7 +22,13 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3426,7 +3432,13 @@ define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3467,7 +3479,13 @@ define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i32> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i32> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32> @@ -3508,7 +3526,13 @@ define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3549,7 +3573,13 @@ define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP21]], <4 x i16> [[TMP23]], <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <4 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <4 x i16> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> @@ -3590,7 +3620,13 @@ define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> ; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]]) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x i32> [[TMP21]], <2 x i32> [[TMP23]], <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i32> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i32> [[TMP26]] to i64 +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast i64 [[TMP27]] to <1 x i64> ; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>