[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction #97755

CarolineConcatto · 2024-07-04T17:18:09Z

This patch adds these intrinsics:

// Variants are also available for: _s8
svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0");

according to PR#324[1]
[1]ARM-software/acle#324

This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324

llvmbot · 2024-07-04T17:18:43Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-aarch64

Author: None (CarolineConcatto)

Changes

This patch adds these intrinsics:

// Variants are also available for: _s8
svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0");

according to PR#324[1]
[1]ARM-software/acle#324

Full diff: https://github.com/llvm/llvm-project/pull/97755.diff

7 Files Affected:

(modified) clang/include/clang/Basic/arm_sme.td (+5)
(added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c (+82)
(modified) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp (+5)
(modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+6)
(modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+16-3)
(modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+1-1)
(added) llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll (+17)

diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index ce8908f566f2fd..e4a61caae733ec 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -817,4 +817,9 @@ multiclass ZAReadzArray<string vg_num>{
 
 defm SVREADZ_VG2 :  ZAReadzArray<"2">;
 defm SVREADZ_VG4 :  ZAReadzArray<"4">;
+
+let SMETargetGuard = "sme2,sme-lutv2" in {
+  def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
+}
+
 } // let SVETargetGuard = InvalidMode
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
new file mode 100644
index 00000000000000..2e7cd0939f516b
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
@@ -0,0 +1,82 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature  +sme-lutv2  -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS   -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature  +sme-lutv2  -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -S -Werror -Wall -o /dev/null %s
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+// CHECK-LABEL: define dso_local <vscale x 64 x i8> @test_luti4_zt_u8_x4(
+// CHECK-SAME: <vscale x 32 x i8> [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 16)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 64 x i8> @_Z19test_luti4_zt_u8_x411svuint8x2_t(
+// CHECK-CXX-SAME: <vscale x 32 x i8> [[OP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 0)
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 16)
+// CHECK-CXX-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]])
+// CHECK-CXX-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
+// CHECK-CXX-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
+// CHECK-CXX-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
+// CHECK-CXX-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
+// CHECK-CXX-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
+// CHECK-CXX-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
+// CHECK-CXX-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
+// CHECK-CXX-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
+// CHECK-CXX-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
+//
+svuint8x4_t test_luti4_zt_u8_x4(svuint8x2_t op)  __arm_streaming __arm_in("zt0") {
+  return svluti4_zt_u8_x4(0, op);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 64 x i8> @test_luti4_zt_s8_x4(
+// CHECK-SAME: <vscale x 32 x i8> [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 16)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
+//
+// CHECK-CXX-LABEL: define dso_local <vscale x 64 x i8> @_Z19test_luti4_zt_s8_x410svint8x2_t(
+// CHECK-CXX-SAME: <vscale x 32 x i8> [[OP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 0)
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[OP]], i64 16)
+// CHECK-CXX-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]])
+// CHECK-CXX-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 0
+// CHECK-CXX-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP3]], i64 0)
+// CHECK-CXX-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 1
+// CHECK-CXX-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 16)
+// CHECK-CXX-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 2
+// CHECK-CXX-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 32)
+// CHECK-CXX-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP2]], 3
+// CHECK-CXX-NEXT:    [[TMP10:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP8]], <vscale x 16 x i8> [[TMP9]], i64 48)
+// CHECK-CXX-NEXT:    ret <vscale x 64 x i8> [[TMP10]]
+//
+svint8x4_t test_luti4_zt_s8_x4(svint8x2_t op)  __arm_streaming __arm_in("zt0") {
+  return svluti4_zt_s8_x4(0, op);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 5de97649af5d3a..567273ab141f25 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -350,3 +350,8 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16,
   svsudot_lane_za32_s8_vg1x2(slice_base, z_s8x2, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
   svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 }
+
+void test_rluti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_inout("zt0") {
+  // Check Zt tile 0
+  svluti4_zt_u8_x4(1, op);  // expected-error {{argument value 1 is outside the valid range [0, 0]}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 6f3694cf952d47..e65ebd5207d0af 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3678,6 +3678,12 @@ let TargetPrefix = "aarch64" in {
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty],
                             [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrReadMem]>;
+
+  def int_aarch64_sme_luti4_zt_x4
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                            [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty],
+                            [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
+
 }
 
 // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 59cfd8d6c27d25..9874a20fa20d4f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -410,7 +410,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   }
 
   void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
-                             uint32_t MaxImm);
+                             uint32_t MaxImm, bool IsMultiVector = false);
 
   template <unsigned MaxIdx, unsigned Scale>
   bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
@@ -1896,15 +1896,23 @@ void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
 
 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
                                                 unsigned NumOutVecs,
-                                                unsigned Opc, uint32_t MaxImm) {
+                                                unsigned Opc, uint32_t MaxImm,
+                                                bool IsMultiVector) {
   if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
     if (Imm->getZExtValue() > MaxImm)
       return;
 
   SDValue ZtValue;
+  SmallVector<SDValue, 4> Ops;
   if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
     return;
-  SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
+  Ops.push_back(ZtValue);
+  if (IsMultiVector) {
+    Ops.push_back(createZMulTuple({Node->getOperand(3), Node->getOperand(4)}));
+  } else {
+    Ops.push_back(Node->getOperand(3));
+    Ops.push_back(Node->getOperand(4));
+  }
   SDLoc DL(Node);
   EVT VT = Node->getValueType(0);
 
@@ -5415,6 +5423,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
         SelectMultiVectorLuti(Node, 2, Opc, 3);
       return;
     }
+    case Intrinsic::aarch64_sme_luti4_zt_x4: {
+      // Does not have immediate but it has 2ZPR input
+      SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 0, true);
+      return;
+    }
     }
   } break;
   case ISD::INTRINSIC_WO_CHAIN: {
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 709a98d3a8cb4d..111deefec860fc 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -936,7 +936,7 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
 
 let Predicates = [HasSME2, HasSME_LUTv2] in {
 defm MOVT : sme2_movt_zt_to_zt<"movt",  0b0011111>;
-def LUTI4_4ZZT2Z    : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
+def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
 } //[HasSME2, HasSME_LUTv2]
 
 let Predicates = [HasSME2p1, HasSME_LUTv2] in {
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll
new file mode 100644
index 00000000000000..778f31194baf45
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-write-zt.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>}  @test_luti4_zt_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) #0 {
+; CHECK-LABEL: test_luti4_zt_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    luti4 { z0.b - z3.b }, zt0, { z0, z1 }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1)
+  ret {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} %res
+}
+
+attributes #0 = { "target-features"="+sme2,+sme-lutv2"}

SpencerAbson · 2024-07-29T17:23:53Z

clang/include/clang/Basic/arm_sme.td

@@ -817,4 +817,9 @@ multiclass ZAReadzArray<string vg_num>{

 defm SVREADZ_VG2 :  ZAReadzArray<"2">;
 defm SVREADZ_VG4 :  ZAReadzArray<"4">;
+
+let SMETargetGuard = "sme2,sme-lutv2" in {
+  def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>;


Should the ZT0 diagnostics flag not be IsInZT0? As described in the ACLE, and we are not changing the state of ZT0.

Thank you Spencer!

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

CarolineConcatto · 2024-09-10T09:53:39Z

The ACLE was merged so we can merge this patch when approved.
Therefore the ping.

SpencerAbson

Thank you - just a few nits but it LGTM.

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

SpencerAbson · 2024-09-10T15:47:22Z

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

@@ -410,7 +410,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
  }

  void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
-                             uint32_t MaxImm);
+                             uint32_t MaxImm, bool IsMultiVector = false);


nit: Could this bool be renamed to something like AreIndicesMultiVector?

There are 2 functions now, one for lane and without. So this is not used anymore.

SpencerAbson · 2024-09-10T16:29:50Z

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

@@ -5415,6 +5423,11 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
        SelectMultiVectorLuti(Node, 2, Opc, 3);
      return;
    }
+    case Intrinsic::aarch64_sme_luti4_zt_x4: {
+      // Does not have immediate but it has 2ZPR input
+      SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z, 0, true);


No strong opinion, but as isMultiVector and MaxImm are mutually exclusive parameters (i.e, if you supply one, it makes no sense to supply the other), you could separate this into two separate functions.

I create a SelectMultiVectorLutiLane and a SelectMultiVectorLuti.

…on (#97755)" Going to revert to Fix test in clang as it is failing This reverts commit 445d8b2.

…#97755) This patch was reverted because of a failing C test. It now has being solved and can be merged into main again This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324 OBS.: Fix the clang test run line

…#97755) This patch was reverted because of a failing C test. It now has being solved and can be merged into main again This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324 OBS.: Fix the clang test run line Address comments about the functions SelectMultiVectorLuti

llvm-ci · 2024-09-25T12:47:07Z

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building clang,llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/10010

Here is the relevant piece of the build log for the reference

Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'Clang :: CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /build/buildbot/premerge-monolithic-linux/build/bin/clang -cc1 -internal-isystem /build/buildbot/premerge-monolithic-linux/build/lib/clang/20/include -nostdsysteminc  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - /build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
+ /build/buildbot/premerge-monolithic-linux/build/bin/clang -cc1 -internal-isystem /build/buildbot/premerge-monolithic-linux/build/lib/clang/20/include -nostdsysteminc -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - /build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
+ /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
/build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c:12:17: error: CHECK-LABEL: expected string not found in input
// CHECK-LABEL: define dso_local <vscale x 64 x i8> @test_luti4_zt_u8_x4(
                ^
<stdin>:1:1: note: scanning from here
; ModuleID = '/build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c'
^
<stdin>:7:65: note: possible intended match here
define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_luti4_zt_u8_x4(<vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) local_unnamed_addr #0 {
                                                                ^

Input file: <stdin>
Check file: /build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            1: ; ModuleID = '/build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c' 
label:12'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
            2: source_filename = "/build/buildbot/premerge-monolithic-linux/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            3: target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            4: target triple = "aarch64-none-linux-gnu" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            5:  
label:12'0     ~
            6: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            7: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_luti4_zt_u8_x4(<vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) local_unnamed_addr #0 { 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
label:12'1                                                                     ?                                                                                                                                                 possible intended match
            8: entry: 
label:12'0     ~~~~~~~
            9:  %retval = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           10:  %0 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           11:  %1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %0, 0 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           12:  %2 = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> %1, i64 0) 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...

llvm-ci · 2024-09-26T05:57:41Z

LLVM Buildbot has detected a new failure on builder clang-x86_64-debian-fast running on gribozavr4 while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/8336

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'Clang :: CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/clang-x86_64-debian-fast/llvm.obj/bin/clang -cc1 -internal-isystem /b/1/clang-x86_64-debian-fast/llvm.obj/lib/clang/20/include -nostdsysteminc  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - /b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c | /b/1/clang-x86_64-debian-fast/llvm.obj/bin/FileCheck /b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
+ /b/1/clang-x86_64-debian-fast/llvm.obj/bin/clang -cc1 -internal-isystem /b/1/clang-x86_64-debian-fast/llvm.obj/lib/clang/20/include -nostdsysteminc -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - /b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
+ /b/1/clang-x86_64-debian-fast/llvm.obj/bin/FileCheck /b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
/b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c:12:17: error: CHECK-LABEL: expected string not found in input
// CHECK-LABEL: define dso_local <vscale x 64 x i8> @test_luti4_zt_u8_x4(
                ^
<stdin>:1:1: note: scanning from here
; ModuleID = '/b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c'
^
<stdin>:7:65: note: possible intended match here
define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_luti4_zt_u8_x4(<vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) local_unnamed_addr #0 {
                                                                ^

Input file: <stdin>
Check file: /b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            1: ; ModuleID = '/b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c' 
label:12'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
            2: source_filename = "/b/1/clang-x86_64-debian-fast/llvm.src/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            3: target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            4: target triple = "aarch64-none-linux-gnu" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            5:  
label:12'0     ~
            6: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            7: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_luti4_zt_u8_x4(<vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) local_unnamed_addr #0 { 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
label:12'1                                                                     ?                                                                                                                                                 possible intended match
            8: entry: 
label:12'0     ~~~~~~~
            9:  %retval = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           10:  %0 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           11:  %1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %0, 0 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           12:  %2 = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> %1, i64 0) 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...

llvm-ci · 2024-09-26T06:10:45Z

LLVM Buildbot has detected a new failure on builder llvm-x86_64-debian-dylib running on gribozavr4 while building clang,llvm at step 6 "test-build-unified-tree-check-clang".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/8580

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-clang) failure: test (failure)
******************** TEST 'Clang :: CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-x86_64-debian-dylib/build/bin/clang -cc1 -internal-isystem /b/1/llvm-x86_64-debian-dylib/build/lib/clang/20/include -nostdsysteminc  -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - /b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck /b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
+ /b/1/llvm-x86_64-debian-dylib/build/bin/clang -cc1 -internal-isystem /b/1/llvm-x86_64-debian-dylib/build/lib/clang/20/include -nostdsysteminc -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-lutv2 -O2 -Werror -Wall -emit-llvm -o - /b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck /b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c
/b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c:12:17: error: CHECK-LABEL: expected string not found in input
// CHECK-LABEL: define dso_local <vscale x 64 x i8> @test_luti4_zt_u8_x4(
                ^
<stdin>:1:1: note: scanning from here
; ModuleID = '/b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c'
^
<stdin>:7:65: note: possible intended match here
define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_luti4_zt_u8_x4(<vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) local_unnamed_addr #0 {
                                                                ^

Input file: <stdin>
Check file: /b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c

-dump-input=help explains the following input dump.

Input was:
<<<<<<
            1: ; ModuleID = '/b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c' 
label:12'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
            2: source_filename = "/b/1/llvm-x86_64-debian-dylib/llvm-project/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_zt.c" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            3: target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            4: target triple = "aarch64-none-linux-gnu" 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            5:  
label:12'0     ~
            6: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            7: define dso_local { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_luti4_zt_u8_x4(<vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) local_unnamed_addr #0 { 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
label:12'1                                                                     ?                                                                                                                                                 possible intended match
            8: entry: 
label:12'0     ~~~~~~~
            9:  %retval = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           10:  %0 = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %op.coerce0, <vscale x 16 x i8> %op.coerce1) 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           11:  %1 = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %0, 0 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           12:  %2 = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> %1, i64 0) 
label:12'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...

…) (#109953) This patch was reverted because of a failing C test. It now has being solved and can be merged into main again This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324

[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction

22f5bb7

This patch adds these intrinsics: // Variants are also available for: _s8 svuint8x4_t svluti4_zt_u8_x4(uint64_t zt0, svuint8x2_t zn) __arm_streaming __arm_in("zt0"); according to PR#324[1] [1]ARM-software/acle#324

llvmbot added clang Clang issues not falling into any other category backend:AArch64 clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:ir labels Jul 4, 2024

CarolineConcatto requested review from momchil-velikov and Lukacma July 4, 2024 17:18

Lut should have index always as unsigned

1163ccf

SpencerAbson reviewed Jul 30, 2024

View reviewed changes

Fix ZT0 attribute in clang

6a8be2f

SpencerAbson approved these changes Sep 10, 2024

View reviewed changes

CarolineConcatto merged commit 445d8b2 into llvm:main Sep 25, 2024
8 checks passed

CarolineConcatto deleted the luti4 branch September 25, 2024 08:53

CarolineConcatto added a commit that referenced this pull request Sep 25, 2024

Revert "[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instructi…

02f46d7

…on (#97755)" Going to revert to Fix test in clang as it is failing This reverts commit 445d8b2.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction #97755

[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction #97755

Uh oh!

CarolineConcatto commented Jul 4, 2024

Uh oh!

llvmbot commented Jul 4, 2024 •

edited

Loading

Uh oh!

SpencerAbson Jul 29, 2024

Uh oh!

CarolineConcatto Aug 7, 2024

Uh oh!

Uh oh!

CarolineConcatto commented Sep 10, 2024

Uh oh!

SpencerAbson left a comment

Uh oh!

Uh oh!

SpencerAbson Sep 10, 2024

Uh oh!

CarolineConcatto Sep 23, 2024

Uh oh!

SpencerAbson Sep 10, 2024

Uh oh!

CarolineConcatto Sep 23, 2024

Uh oh!

Uh oh!

llvm-ci commented Sep 25, 2024

Uh oh!

llvm-ci commented Sep 26, 2024

Uh oh!

llvm-ci commented Sep 26, 2024

Uh oh!

Uh oh!

[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction #97755

[Clang][LLVM][AArch64] Add intrinsic for LUTI4 SME2 instruction #97755

Uh oh!

Conversation

CarolineConcatto commented Jul 4, 2024

Uh oh!

llvmbot commented Jul 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

SpencerAbson Jul 29, 2024

Choose a reason for hiding this comment

Uh oh!

CarolineConcatto Aug 7, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

CarolineConcatto commented Sep 10, 2024

Uh oh!

SpencerAbson left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

SpencerAbson Sep 10, 2024

Choose a reason for hiding this comment

Uh oh!

CarolineConcatto Sep 23, 2024

Choose a reason for hiding this comment

Uh oh!

SpencerAbson Sep 10, 2024

Choose a reason for hiding this comment

Uh oh!

CarolineConcatto Sep 23, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Sep 25, 2024

Uh oh!

llvm-ci commented Sep 26, 2024

Uh oh!

llvm-ci commented Sep 26, 2024

Uh oh!

Uh oh!

llvmbot commented Jul 4, 2024 •

edited

Loading