diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 60f3893f20a79..04b076d21ffba 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2602,6 +2602,59 @@ struct MemorySanitizerVisitor : public InstVisitor { SC.Done(&I); } + /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent + /// fields. + /// + /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) + /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) + /// + /// TODO: adapt this function to handle horizontal add/sub? + void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I) { + assert(I.arg_size() == 1 || I.arg_size() == 2); + + assert(I.getType()->isVectorTy()); + assert(I.getArgOperand(0)->getType()->isVectorTy()); + + FixedVectorType *ParamType = + cast(I.getArgOperand(0)->getType()); + if (I.arg_size() == 2) + assert(ParamType == cast(I.getArgOperand(1)->getType())); + FixedVectorType *ReturnType = cast(I.getType()); + assert(ParamType->getNumElements() * I.arg_size() == + 2 * ReturnType->getNumElements()); + + IRBuilder<> IRB(&I); + unsigned Width = ParamType->getNumElements() * I.arg_size(); + + // Horizontal OR of shadow + SmallVector EvenMask; + SmallVector OddMask; + for (unsigned X = 0; X < Width; X += 2) { + EvenMask.push_back(X); + OddMask.push_back(X + 1); + } + + Value *FirstArgShadow = getShadow(&I, 0); + Value *EvenShadow; + Value *OddShadow; + if (I.arg_size() == 2) { + Value *SecondArgShadow = getShadow(&I, 1); + EvenShadow = + IRB.CreateShuffleVector(FirstArgShadow, SecondArgShadow, EvenMask); + OddShadow = + IRB.CreateShuffleVector(FirstArgShadow, SecondArgShadow, OddMask); + } else { + EvenShadow = IRB.CreateShuffleVector(FirstArgShadow, EvenMask); + OddShadow = IRB.CreateShuffleVector(FirstArgShadow, OddMask); + } + + Value *OrShadow = IRB.CreateOr(EvenShadow, OddShadow); + OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I)); + + setShadow(&I, OrShadow); + setOriginForNaryOp(I); + } + void visitFNeg(UnaryOperator &I) { handleShadowOr(I); } // Handle multiplication by constant. @@ -4781,6 +4834,17 @@ struct MemorySanitizerVisitor : public InstVisitor { setOrigin(&I, getCleanOrigin()); break; + // Add Pairwise + case Intrinsic::aarch64_neon_addp: + // Floating-point Add Pairwise + case Intrinsic::aarch64_neon_faddp: + // Add Long Pairwise + case Intrinsic::aarch64_neon_saddlp: + case Intrinsic::aarch64_neon_uaddlp: { + handlePairwiseShadowOrIntrinsic(I); + break; + } + case Intrinsic::aarch64_neon_st1x2: case Intrinsic::aarch64_neon_st1x3: case Intrinsic::aarch64_neon_st1x4: diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll index f9b223dc420b9..ad0856d38c1e9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll @@ -1601,7 +1601,7 @@ define <4 x i16> @saddlp4h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1611,15 +1611,12 @@ define <4 x i16> @saddlp4h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; %tmpvar1 = load <8 x i8>, ptr %A @@ -1633,7 +1630,7 @@ define <2 x i32> @saddlp2s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1643,15 +1640,12 @@ define <2 x i32> @saddlp2s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) -; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %tmpvar1 = load <4 x i16>, ptr %A @@ -1665,7 +1659,7 @@ define <1 x i64> @saddlp1d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1675,15 +1669,12 @@ define <1 x i64> @saddlp1d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <1 x i32> [[TMP9]] to <1 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]]) -; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <1 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; %tmpvar1 = load <2 x i32>, ptr %A @@ -1697,7 +1688,7 @@ define <8 x i16> @saddlp8h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1707,15 +1698,12 @@ define <8 x i16> @saddlp8h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[TMP9]] to <8 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %tmpvar1 = load <16 x i8>, ptr %A @@ -1729,7 +1717,7 @@ define <4 x i32> @saddlp4s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1739,15 +1727,12 @@ define <4 x i32> @saddlp4s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i16> [[TMP9]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %tmpvar1 = load <8 x i16>, ptr %A @@ -1761,7 +1746,7 @@ define <2 x i64> @saddlp2d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1771,15 +1756,12 @@ define <2 x i64> @saddlp2d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i32> [[TMP9]] to <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %tmpvar1 = load <4 x i32>, ptr %A @@ -1801,7 +1783,7 @@ define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1811,15 +1793,12 @@ define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] ; %tmpvar1 = load <8 x i8>, ptr %A @@ -1833,7 +1812,7 @@ define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1843,15 +1822,12 @@ define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) -; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] ; %tmpvar1 = load <4 x i16>, ptr %A @@ -1865,7 +1841,7 @@ define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1875,15 +1851,12 @@ define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <1 x i32> [[TMP9]] to <1 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]]) -; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <1 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[TMP3]] ; %tmpvar1 = load <2 x i32>, ptr %A @@ -1897,7 +1870,7 @@ define <8 x i16> @uaddlp8h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1907,15 +1880,12 @@ define <8 x i16> @uaddlp8h(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[TMP9]] to <8 x i16> ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] ; %tmpvar1 = load <16 x i8>, ptr %A @@ -1929,7 +1899,7 @@ define <4 x i32> @uaddlp4s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1939,15 +1909,12 @@ define <4 x i32> @uaddlp4s(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i16> [[TMP9]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %tmpvar1 = load <8 x i16>, ptr %A @@ -1961,7 +1928,7 @@ define <2 x i64> @uaddlp2d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; CHECK: 2: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -1971,15 +1938,12 @@ define <2 x i64> @uaddlp2d(ptr %A) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i32> [[TMP9]] to <2 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %tmpvar1 = load <4 x i32>, ptr %A @@ -2012,26 +1976,23 @@ define <4 x i16> @sadalp4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP10]] to <4 x i16> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP15]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i16> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR5]] @@ -2060,26 +2021,23 @@ define <2 x i32> @sadalp2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i16> [[TMP10]] to <2 x i32> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP15]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i32> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR5]] @@ -2108,26 +2066,23 @@ define <8 x i16> @sadalp8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP15]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <8 x i16> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR5]] @@ -2156,26 +2111,23 @@ define <4 x i32> @sadalp4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP15]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i32> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR5]] @@ -2204,26 +2156,23 @@ define <2 x i64> @sadalp2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i32> [[TMP10]] to <2 x i64> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP15]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMPVAR5]] @@ -2252,26 +2201,23 @@ define <4 x i16> @uadalp4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP10]] to <4 x i16> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP15]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i16> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMPVAR5]] @@ -2300,26 +2246,23 @@ define <2 x i32> @uadalp2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i16> [[TMP10]] to <2 x i32> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP15]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i32> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMPVAR5]] @@ -2348,26 +2291,23 @@ define <8 x i16> @uadalp8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP15]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <8 x i16> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMPVAR5]] @@ -2396,26 +2336,23 @@ define <4 x i32> @uadalp4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP15]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i32> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMPVAR5]] @@ -2444,26 +2381,23 @@ define <2 x i64> @uadalp2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <2 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i32> [[TMP10]] to <2 x i64> ; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) ; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP12:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK: 13: ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP15]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP11]], [[_MSLD1]] ; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMP4]] ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMPVAR5]] @@ -2482,7 +2416,7 @@ define <8 x i8> @addp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2503,7 +2437,9 @@ define <8 x i8> @addp_8b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i8> [[TMP3]] @@ -2521,7 +2457,7 @@ define <16 x i8> @addp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2542,7 +2478,9 @@ define <16 x i8> @addp_16b(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> [[_MSLD1]], <16 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[TMP3]] @@ -2560,7 +2498,7 @@ define <4 x i16> @addp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2581,7 +2519,9 @@ define <4 x i16> @addp_4h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i16> [[TMP3]] @@ -2599,7 +2539,7 @@ define <8 x i16> @addp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2620,7 +2560,9 @@ define <8 x i16> @addp_8h(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> [[_MSLD1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[TMP3]] @@ -2638,7 +2580,7 @@ define <2 x i32> @addp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2659,7 +2601,9 @@ define <2 x i32> @addp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i32> [[TMP3]] @@ -2677,7 +2621,7 @@ define <4 x i32> @addp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2698,7 +2642,9 @@ define <4 x i32> @addp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[TMP3]] @@ -2716,7 +2662,7 @@ define <2 x i64> @addp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2737,7 +2683,9 @@ define <2 x i64> @addp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[TMPVAR1]], <2 x i64> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x i64> [[TMP3]] @@ -2763,7 +2711,7 @@ define <2 x float> @faddp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2784,7 +2732,9 @@ define <2 x float> @faddp_2s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x float> [[TMP3]] @@ -2802,7 +2752,7 @@ define <4 x float> @faddp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2823,7 +2773,9 @@ define <4 x float> @faddp_4s(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[TMP3]] @@ -2841,7 +2793,7 @@ define <2 x double> @faddp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] ; CHECK-NEXT: unreachable @@ -2862,7 +2814,9 @@ define <2 x double> @faddp_2d(ptr %A, ptr %B) nounwind #0 { ; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr ; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[_MSLD]], <2 x i64> [[_MSLD1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[TMP3]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll index 8a1e3551b2741..7fa9b412b0f03 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll @@ -9405,17 +9405,19 @@ define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr @@ -9438,17 +9440,19 @@ define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr @@ -9471,17 +9475,19 @@ define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr @@ -9504,17 +9510,19 @@ define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr @@ -9537,17 +9545,19 @@ define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sani ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr @@ -9570,17 +9580,19 @@ define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr @@ -9603,17 +9615,19 @@ define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP10]], [[TMP4]] ; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer) ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 5: +; CHECK: 8: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK: 9: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr