-
Notifications
You must be signed in to change notification settings - Fork 13.9k
[msan] Add handlePairwiseShadowOrIntrinsic and use it to handle Arm NEON pairwise add #126008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-compiler-rt-sanitizer @llvm/pr-subscribers-llvm-transforms Author: Thurston Dang (thurstond) ChangesThis patch adds a function, handlePairwiseShadowOrIntrinsic that ORs pairs of adjacent shadow values; this is suitable for propagating shadow for 1- or 2-vector intrinsics that combine adjacent fields. It then applies handlePairwiseShadowOrIntrinsic to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled). Updates the tests from #125820. Patch is 75.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126008.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5df51de9e24cb3..fc5f5079c59f0d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2602,6 +2602,57 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SC.Done(&I);
}
+ /// Propagate shadow for 1- or 2-vector intrinsics that combine adjacent
+ /// fields.
+ ///
+ /// e.g., <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>)
+ /// <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>)
+ ///
+ /// TODO: adapt this function to handle horizontal add/sub?
+ void handlePairwiseShadowOrIntrinsic(IntrinsicInst &I) {
+ assert(I.arg_size() == 1 || I.arg_size() == 2);
+
+ assert(I.getType()->isVectorTy());
+ assert(I.getArgOperand(0)->getType()->isVectorTy());
+
+ FixedVectorType *ParamType =
+ cast<FixedVectorType>(I.getArgOperand(0)->getType());
+ if (I.arg_size() == 2)
+ assert(ParamType == cast<FixedVectorType>(I.getArgOperand(1)->getType()));
+ FixedVectorType *ReturnType = cast<FixedVectorType>(I.getType());
+ assert(ParamType->getNumElements() * I.arg_size() ==
+ 2 * ReturnType->getNumElements());
+
+ IRBuilder<> IRB(&I);
+ unsigned Width = ParamType->getNumElements() * I.arg_size();
+
+ // Horizontal OR of shadow
+ SmallVector<int, 8> EvenMask;
+ SmallVector<int, 8> OddMask;
+ for (unsigned X = 0; X < Width; X += 2) {
+ EvenMask.append(1, X);
+ OddMask.append(1, X + 1);
+ }
+
+ Value *EvenShadow;
+ Value *OddShadow;
+ if (I.arg_size() == 2) {
+ EvenShadow =
+ IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), EvenMask);
+ OddShadow =
+ IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1), OddMask);
+ } else {
+ EvenShadow = IRB.CreateShuffleVector(getShadow(&I, 0), EvenMask);
+ OddShadow = IRB.CreateShuffleVector(getShadow(&I, 0), OddMask);
+ }
+
+ Value *OrShadow = IRB.CreateOr(EvenShadow, OddShadow);
+ OrShadow = CreateShadowCast(IRB, OrShadow, getShadowTy(&I));
+
+ setShadow(&I, OrShadow);
+ setOriginForNaryOp(I);
+ }
+
void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
// Handle multiplication by constant.
@@ -4776,6 +4827,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
break;
+ // Add Pairwise
+ case Intrinsic::aarch64_neon_addp:
+ // Floating-point Add Pairwise
+ case Intrinsic::aarch64_neon_faddp:
+ // Add Long Pairwise
+ case Intrinsic::aarch64_neon_saddlp:
+ case Intrinsic::aarch64_neon_uaddlp: {
+ handlePairwiseShadowOrIntrinsic(I);
+ break;
+ }
+
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll
index f9b223dc420b92..ad0856d38c1e9a 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll
@@ -1601,7 +1601,7 @@ define <4 x i16> @saddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1611,15 +1611,12 @@ define <4 x i16> @saddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]])
-; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
;
%tmpvar1 = load <8 x i8>, ptr %A
@@ -1633,7 +1630,7 @@ define <2 x i32> @saddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1643,15 +1640,12 @@ define <2 x i32> @saddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i16> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]])
-; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%tmpvar1 = load <4 x i16>, ptr %A
@@ -1665,7 +1659,7 @@ define <1 x i64> @saddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1675,15 +1669,12 @@ define <1 x i64> @saddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> <i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <1 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]])
-; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <1 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <1 x i64> [[TMP3]]
;
%tmpvar1 = load <2 x i32>, ptr %A
@@ -1697,7 +1688,7 @@ define <8 x i16> @saddlp8h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1707,15 +1698,12 @@ define <8 x i16> @saddlp8h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <8 x i8> [[TMP9]] to <8 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]])
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
;
%tmpvar1 = load <16 x i8>, ptr %A
@@ -1729,7 +1717,7 @@ define <4 x i32> @saddlp4s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1739,15 +1727,12 @@ define <4 x i32> @saddlp4s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i16> [[TMP9]] to <4 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%tmpvar1 = load <8 x i16>, ptr %A
@@ -1761,7 +1746,7 @@ define <2 x i64> @saddlp2d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1771,15 +1756,12 @@ define <2 x i64> @saddlp2d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i32> [[TMP9]] to <2 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]])
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
;
%tmpvar1 = load <4 x i32>, ptr %A
@@ -1801,7 +1783,7 @@ define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1811,15 +1793,12 @@ define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i8> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i16>
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]])
-; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
;
%tmpvar1 = load <8 x i8>, ptr %A
@@ -1833,7 +1812,7 @@ define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1843,15 +1822,12 @@ define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i16> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32>
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]])
-; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%tmpvar1 = load <4 x i16>, ptr %A
@@ -1865,7 +1841,7 @@ define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
; CHECK: 2:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
@@ -1875,15 +1851,12 @@ define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> poison, <1 x i32> <i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext <1 x i32> [[TMP9]] to <1 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]])
-; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <1 x i64> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <1 x i64> [[TMP3]]
;
%tmpvar1 = load <2 x i32>, ptr %A
@@ -1897,7 +1870,7 @@ define ...
[truncated]
|
2193304
to
d4cb8ef
Compare
d4cb8ef
to
ffc22d8
Compare
…EON pairwise add This adds a function, handlePairwiseShadowOrIntrinsic, that propagates shadow for 1- or 2-vector intrinsics that combine adjacent fields, by OR'ing the corresponding adjacent shadows. It then applies it to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled). Updates the tests from llvm#125820
ffc22d8
to
1a66bd1
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/6453 Here is the relevant piece of the build log for the reference
|
…EON pairwise add (llvm#126008) This patch adds a function, handlePairwiseShadowOrIntrinsic that ORs pairs of adjacent shadow values; this is suitable for propagating shadow for 1- or 2-vector intrinsics that combine adjacent fields. It then applies handlePairwiseShadowOrIntrinsic to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled). Updates the tests from llvm#125820.
…EON pairwise add (llvm#126008) This patch adds a function, handlePairwiseShadowOrIntrinsic that ORs pairs of adjacent shadow values; this is suitable for propagating shadow for 1- or 2-vector intrinsics that combine adjacent fields. It then applies handlePairwiseShadowOrIntrinsic to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled). Updates the tests from llvm#125820.
…rwise add/sub x86 pairwise add and sub are currently handled by applying the pairwise add intrinsic to the shadow (llvm#124835), due to the lack of an x86 pairwise OR intrinsic. handlePairwiseShadowOrIntrinsic was added (llvm#126008) to handle Arm pairwise add, but assumes that the intrinsic operates on each pair of elements as defined by the LLVM type. In contrast, x86 pairwise add/sub may sometimes have e.g., <1 x i64> as a parameter but actually be operating on <2 x i32>. This patch generalizes handlePairwiseShadowOrIntrinsic, to allow reinterpreting the parameters to be a vector of specified element size, and then uses this function to handle x86 pairwise add/sub.
…EON pairwise add (llvm#126008) This patch adds a function, handlePairwiseShadowOrIntrinsic that ORs pairs of adjacent shadow values; this is suitable for propagating shadow for 1- or 2-vector intrinsics that combine adjacent fields. It then applies handlePairwiseShadowOrIntrinsic to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled). Updates the tests from llvm#125820.
…rwise add/sub (#127567) x86 pairwise add and sub are currently handled by applying the pairwise add intrinsic to the shadow (#124835), due to the lack of an x86 pairwise OR intrinsic. handlePairwiseShadowOrIntrinsic was added (#126008) to handle Arm pairwise add, but assumes that the intrinsic operates on each pair of elements as defined by the LLVM type. In contrast, x86 pairwise add/sub may sometimes have e.g., <1 x i64> as a parameter but actually be operating on <2 x i32>. This patch generalizes handlePairwiseShadowOrIntrinsic, to allow reinterpreting the parameters to be a vector of specified element size, and then uses this function to handle x86 pairwise add/sub.
…dle x86 pairwise add/sub (#127567) x86 pairwise add and sub are currently handled by applying the pairwise add intrinsic to the shadow (llvm/llvm-project#124835), due to the lack of an x86 pairwise OR intrinsic. handlePairwiseShadowOrIntrinsic was added (llvm/llvm-project#126008) to handle Arm pairwise add, but assumes that the intrinsic operates on each pair of elements as defined by the LLVM type. In contrast, x86 pairwise add/sub may sometimes have e.g., <1 x i64> as a parameter but actually be operating on <2 x i32>. This patch generalizes handlePairwiseShadowOrIntrinsic, to allow reinterpreting the parameters to be a vector of specified element size, and then uses this function to handle x86 pairwise add/sub.
…rwise add/sub (llvm#127567) x86 pairwise add and sub are currently handled by applying the pairwise add intrinsic to the shadow (llvm#124835), due to the lack of an x86 pairwise OR intrinsic. handlePairwiseShadowOrIntrinsic was added (llvm#126008) to handle Arm pairwise add, but assumes that the intrinsic operates on each pair of elements as defined by the LLVM type. In contrast, x86 pairwise add/sub may sometimes have e.g., <1 x i64> as a parameter but actually be operating on <2 x i32>. This patch generalizes handlePairwiseShadowOrIntrinsic, to allow reinterpreting the parameters to be a vector of specified element size, and then uses this function to handle x86 pairwise add/sub.
This patch adds a function, handlePairwiseShadowOrIntrinsic that ORs pairs of adjacent shadow values; this is suitable for propagating shadow for 1- or 2-vector intrinsics that combine adjacent fields. It then applies handlePairwiseShadowOrIntrinsic to Arm NEON pairwise add: llvm.aarch64.neon.{addhn, raddhn} (currently incorrectly handled) and llvm.aarch64.neon.{saddlp, uaddlp} (currently suboptimally handled).
Updates the tests from #125820.