Skip to content

[msan] Support NEON vector multiplication instructions #117944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 9, 2024

Conversation

thurstond
Copy link
Contributor

Approximates the shadow propagation via OR'ing.

Updates the neon_vmul.ll test introduced in #117935

@llvmbot
Copy link
Member

llvmbot commented Nov 27, 2024

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Thurston Dang (thurstond)

Changes

Approximates the shadow propagation via OR'ing.

Updates the neon_vmul.ll test introduced in #117935


Patch is 382.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117944.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+13)
  • (added) llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vmul.ll (+6173)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index dca6bf1adfde8a..8c87e54a366f99 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4012,6 +4012,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // Approximation only
+  void handleNEONVectorMultiplyIntrinsic(IntrinsicInst &I) { handleShadowOr(I); }
+
   void visitIntrinsicInst(IntrinsicInst &I) {
     switch (I.getIntrinsicID()) {
     case Intrinsic::uadd_with_overflow:
@@ -4409,6 +4412,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     }
 
+    case Intrinsic::aarch64_neon_fmulx:
+    case Intrinsic::aarch64_neon_pmul:
+    case Intrinsic::aarch64_neon_pmull:
+    case Intrinsic::aarch64_neon_smull:
+    case Intrinsic::aarch64_neon_pmull64:
+    case Intrinsic::aarch64_neon_umull: {
+      handleNEONVectorMultiplyIntrinsic(I);
+      break;
+    }
+
     default:
       if (!handleUnknownIntrinsic(I))
         visitInstruction(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vmul.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vmul.ll
new file mode 100644
index 00000000000000..38d66696715094
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vmul.ll
@@ -0,0 +1,6173 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; Test memory sanitizer instrumentation for Arm vector multiplication
+; instructions.
+;
+; Forked from llvm/test/CodeGen/AArch64/arm64-vmul.ll
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @smull8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <8 x i8> [[_MSPROP2]] to <8 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT:    store <8 x i16> [[TMP13]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+;
+  %temp1 = load <8 x i8>, ptr %A
+  %temp2 = load <8 x i8>, ptr %B
+  %temp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %temp1, <8 x i8> %temp2)
+  ret <8 x i16> %temp3
+}
+
+define <4 x i32> @smull4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @smull4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT:    store <4 x i32> [[TMP13]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %temp1 = load <4 x i16>, ptr %A
+  %temp2 = load <4 x i16>, ptr %B
+  %temp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2)
+  ret <4 x i32> %temp3
+}
+
+define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @smull2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT:    store <2 x i64> [[TMP13]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %temp1 = load <2 x i32>, ptr %A
+  %temp2 = load <2 x i32>, ptr %B
+  %temp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2)
+  ret <2 x i64> %temp3
+}
+
+declare <8 x i16>  @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <8 x i16> @umull8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @umull8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <8 x i8> [[_MSPROP2]] to <8 x i16>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT:    store <8 x i16> [[TMP13]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+;
+  %temp1 = load <8 x i8>, ptr %A
+  %temp2 = load <8 x i8>, ptr %B
+  %temp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %temp1, <8 x i8> %temp2)
+  ret <8 x i16> %temp3
+}
+
+define <4 x i32> @umull4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @umull4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT:    store <4 x i32> [[TMP13]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %temp1 = load <4 x i16>, ptr %A
+  %temp2 = load <4 x i16>, ptr %B
+  %temp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2)
+  ret <4 x i32> %temp3
+}
+
+define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @umull2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT:    store <2 x i64> [[TMP13]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %temp1 = load <2 x i32>, ptr %A
+  %temp2 = load <2 x i32>, ptr %B
+  %temp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2)
+  ret <2 x i64> %temp3
+}
+
+declare <8 x i16>  @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @sqdmull4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqdmull4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
+; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
+; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
+; CHECK:       [[BB15]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %temp1 = load <4 x i16>, ptr %A
+  %temp2 = load <4 x i16>, ptr %B
+  %temp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2)
+  ret <4 x i32> %temp3
+}
+
+define <2 x i64> @sqdmull2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sqdmull2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: ...
[truncated]

Copy link

github-actions bot commented Nov 28, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

Approximates the shadow propagation via OR'ing.

Updates the neon_vmul.ll test introduced in llvm#117935
@thurstond thurstond merged commit 3b74abd into llvm:main Dec 9, 2024
6 of 7 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants