Skip to content

Commit 2119666

Browse files
CarolineConcattopuja2196
authored andcommitted
[Clang][LLVM][AArch64] Add intrinsic for MOVT SME2 instruction (#97602)
This patch adds these intrinsics: // Variants are also available for: // [_s8], [_u16], [_s16], [_u32], [_s32], [_u64], [_s64] // [_bf16], [_f16], [_f32], [_f64] void svwrite_lane_zt[_u8](uint64_t zt0, svuint8_t zt, uint64_t idx) __arm_streaming __arm_inout("zt0"); void svwrite_zt[_u8](uint64_t zt0, svuint8_t zt) __arm_streaming __arm_inout("zt0"); according to PR#324[1] [1]ARM-software/acle#324
1 parent a1a3cf2 commit 2119666

File tree

8 files changed

+601
-4
lines changed

8 files changed

+601
-4
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,8 @@ defm SVREADZ_VG2 : ZAReadzArray<"2">;
819819
defm SVREADZ_VG4 : ZAReadzArray<"4">;
820820

821821
let SMETargetGuard = "sme2,sme-lutv2" in {
822+
def SVWRITE_LANE_ZT : SInst<"svwrite_lane_zt[_{d}]", "vidi", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_lane_zt", [IsStreaming, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck1_3>]>;
823+
def SVWRITE_ZT : SInst<"svwrite_zt[_{d}]", "vid", "cUcsUsiUilUlfhdb", MergeNone, "aarch64_sme_write_zt", [IsStreaming, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
822824
def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, "aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
823825
}
824826

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write_lane_zt.c

Lines changed: 402 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
2-
// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -fsyntax-only -verify %s
2+
// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sve2 -target-feature +sme2 -target-feature +sme-i16i64 -target-feature +sme-f64f64 -target-feature +sme-lutv2 -fsyntax-only -verify %s
33

44
// REQUIRES: aarch64-registered-target
55

@@ -351,6 +351,15 @@ void test_svdot_multi_za32_bad_lane(uint32_t slice_base, svuint16_t z_u16,
351351
svsudot_lane_za32_s8_vg1x4(slice_base, z_s8x4, z_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
352352
}
353353

354+
void test_write_zt() __arm_streaming __arm_inout("zt0") {
355+
// Check Zt tile 0
356+
svwrite_lane_zt(1, svundef_s8(), 1); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
357+
svwrite_zt(1, svundef_s8()); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
358+
// Check index
359+
svwrite_lane_zt(0, svundef_s8(), 0); // expected-error {{argument value 0 is outside the valid range [1, 3]}}
360+
svwrite_lane_zt(0, svundef_s8(), 4); // expected-error {{argument value 4 is outside the valid range [1, 3]}}
361+
}
362+
354363
void test_luti4_zt_x4(svuint8x2_t op) __arm_streaming __arm_in("zt0") {
355364
// Check Zt tile 0
356365
svluti4_zt_u8_x4(1, op); // expected-error {{argument value 1 is outside the valid range [0, 0]}}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,6 +2961,15 @@ let TargetPrefix = "aarch64" in {
29612961
[llvm_i32_ty],
29622962
[IntrNoMem, IntrHasSideEffects]>;
29632963

2964+
def int_aarch64_sme_write_lane_zt
2965+
: DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty],
2966+
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, IntrNoMem, IntrHasSideEffects]>;
2967+
2968+
def int_aarch64_sme_write_zt
2969+
: DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty],
2970+
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
2971+
2972+
29642973
def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
29652974

29662975
class SME_OuterProduct_Intrinsic

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3242,6 +3242,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
32423242
return EmitZero(MI, BB);
32433243
case AArch64::ZERO_T_PSEUDO:
32443244
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
3245+
case AArch64::MOVT_TIZ_PSEUDO:
3246+
return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
32453247
}
32463248
}
32473249

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -939,8 +939,8 @@ defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
939939
} //[HasSME2, HasFAMINMAX]
940940

941941
let Predicates = [HasSME2, HasSME_LUTv2] in {
942-
defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>;
943-
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
942+
defm MOVT_TIZ : sme2_movt_zt_to_zt<"movt", 0b0011111, int_aarch64_sme_write_lane_zt, int_aarch64_sme_write_zt>;
943+
def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
944944
} //[HasSME2, HasSME_LUTv2]
945945

946946
let Predicates = [HasSME2p1, HasSME_LUTv2] in {

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3283,10 +3283,21 @@ class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
32833283
let Inst{4-0} = Zt;
32843284
}
32853285

3286-
multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
3286+
multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc, SDPatternOperator intrinsic_lane, SDPatternOperator intrinsic> {
32873287
def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
3288+
def NAME # _PSEUDO
3289+
: Pseudo<(outs), (ins ZTR:$ZT, sme_elm_idx0_3:$off2, ZPRAny:$Zt), []>, Sched<[]> {
3290+
let usesCustomInserter = 1;
3291+
}
32883292
def : InstAlias<mnemonic # "\t$ZTt, $Zt",
32893293
(!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
3294+
3295+
foreach vt = [nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16] in {
3296+
def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), vt:$zn, sme_elm_idx0_3:$imm),
3297+
(!cast<Instruction>(NAME # _PSEUDO) $zt, $imm, $zn)>;
3298+
def : Pat<(intrinsic (imm_to_zt untyped:$zt), vt:$zn),
3299+
(!cast<Instruction>(NAME # _PSEUDO) $zt, 0, $zn)>;
3300+
}
32903301
}
32913302

32923303
//===----------------------------------------------------------------------===//
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -verify-machineinstrs -force-streaming < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
7+
define void @test_write_zt_i8_0(<vscale x 16 x i8> %zn) #0 {
8+
; CHECK-LABEL: test_write_zt_i8_0:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: movt zt0, z0
11+
; CHECK-NEXT: ret
12+
call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> %zn, i32 0)
13+
ret void
14+
}
15+
16+
define void @test_write_zt_i8_1(<vscale x 16 x i8> %zn) #0 {
17+
; CHECK-LABEL: test_write_zt_i8_1:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: movt zt0[1, mul vl], z0
20+
; CHECK-NEXT: ret
21+
call void @llvm.aarch64.sme.write.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> %zn, i32 1)
22+
ret void
23+
}
24+
25+
define void @test_write_zt_i16_2(<vscale x 8 x i16> %zn) #0 {
26+
; CHECK-LABEL: test_write_zt_i16_2:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: movt zt0[2, mul vl], z0
29+
; CHECK-NEXT: ret
30+
call void @llvm.aarch64.sme.write.lane.zt.nxv8i16(i32 0, <vscale x 8 x i16> %zn, i32 2)
31+
ret void
32+
}
33+
34+
define void @test_write_zt_i32_3(<vscale x 4 x i32> %zn) #0 {
35+
; CHECK-LABEL: test_write_zt_i32_3:
36+
; CHECK: // %bb.0:
37+
; CHECK-NEXT: movt zt0[3, mul vl], z0
38+
; CHECK-NEXT: ret
39+
call void @llvm.aarch64.sme.write.lane.zt.nxv4i32(i32 0, <vscale x 4 x i32> %zn, i32 3)
40+
ret void
41+
}
42+
43+
define void @test_write_zt_i64_1(<vscale x 2 x i64> %zn) #0 {
44+
; CHECK-LABEL: test_write_zt_i64_1:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: movt zt0[1, mul vl], z0
47+
; CHECK-NEXT: ret
48+
call void @llvm.aarch64.sme.write.lane.zt.nxv2i64(i32 0, <vscale x 2 x i64> %zn, i32 1)
49+
ret void
50+
}
51+
52+
define void @test_write_zt_f16_2(<vscale x 8 x half> %zn) #0 {
53+
; CHECK-LABEL: test_write_zt_f16_2:
54+
; CHECK: // %bb.0:
55+
; CHECK-NEXT: movt zt0[2, mul vl], z0
56+
; CHECK-NEXT: ret
57+
call void @llvm.aarch64.sme.write.lane.zt.nxv8f16(i32 0, <vscale x 8 x half> %zn, i32 2)
58+
ret void
59+
}
60+
61+
define void @test_write_zt_f32_3(<vscale x 4 x float> %zn) #0 {
62+
; CHECK-LABEL: test_write_zt_f32_3:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: movt zt0[3, mul vl], z0
65+
; CHECK-NEXT: ret
66+
call void @llvm.aarch64.sme.write.lane.zt.nxv4f32(i32 0, <vscale x 4 x float> %zn, i32 3)
67+
ret void
68+
}
69+
70+
define void @test_write_zt_f64_1(<vscale x 2 x double> %zn) #0 {
71+
; CHECK-LABEL: test_write_zt_f64_1:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: movt zt0[1, mul vl], z0
74+
; CHECK-NEXT: ret
75+
call void @llvm.aarch64.sme.write.lane.zt.nxv2f64(i32 0, <vscale x 2 x double> %zn, i32 1)
76+
ret void
77+
}
78+
79+
define void @test_write_zt_bf16_2(<vscale x 8 x bfloat> %zn) #0 {
80+
; CHECK-LABEL: test_write_zt_bf16_2:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: movt zt0[2, mul vl], z0
83+
; CHECK-NEXT: ret
84+
call void @llvm.aarch64.sme.write.lane.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn, i32 2)
85+
ret void
86+
}
87+
88+
;; ALIAS
89+
90+
define void @test_write_zt_i8(<vscale x 16 x i8> %v) #0 {
91+
; CHECK-LABEL: test_write_zt_i8:
92+
; CHECK: // %bb.0:
93+
; CHECK-NEXT: movt zt0, z0
94+
; CHECK-NEXT: ret
95+
tail call void @llvm.aarch64.sme.write.zt.nxv16i8(i32 0, <vscale x 16 x i8> %v)
96+
ret void
97+
}
98+
99+
define void @test_write_zt_i16(<vscale x 8 x i16> %v) #0 {
100+
; CHECK-LABEL: test_write_zt_i16:
101+
; CHECK: // %bb.0:
102+
; CHECK-NEXT: movt zt0, z0
103+
; CHECK-NEXT: ret
104+
tail call void @llvm.aarch64.sme.write.zt.nxv8i16(i32 0, <vscale x 8 x i16> %v)
105+
ret void
106+
}
107+
108+
define void @test_write_zt_i32(<vscale x 4 x i32> %v) #0 {
109+
; CHECK-LABEL: test_write_zt_i32:
110+
; CHECK: // %bb.0:
111+
; CHECK-NEXT: movt zt0, z0
112+
; CHECK-NEXT: ret
113+
tail call void @llvm.aarch64.sme.write.zt.nxv4i32(i32 0, <vscale x 4 x i32> %v)
114+
ret void
115+
}
116+
117+
define void @test_write_zt_i64(<vscale x 2 x i64> %v) #0 {
118+
; CHECK-LABEL: test_write_zt_i64:
119+
; CHECK: // %bb.0:
120+
; CHECK-NEXT: movt zt0, z0
121+
; CHECK-NEXT: ret
122+
tail call void @llvm.aarch64.sme.write.zt.nxv2i64(i32 0, <vscale x 2 x i64> %v)
123+
ret void
124+
}
125+
126+
define void @test_write_zt_f16(<vscale x 8 x half> %v) #0 {
127+
; CHECK-LABEL: test_write_zt_f16:
128+
; CHECK: // %bb.0:
129+
; CHECK-NEXT: movt zt0, z0
130+
; CHECK-NEXT: ret
131+
tail call void @llvm.aarch64.sme.write.zt.nxv8f16(i32 0, <vscale x 8 x half> %v)
132+
ret void
133+
}
134+
135+
define void @test_write_zt_bf16(<vscale x 8 x bfloat> %v) #0 {
136+
; CHECK-LABEL: test_write_zt_bf16:
137+
; CHECK: // %bb.0:
138+
; CHECK-NEXT: movt zt0, z0
139+
; CHECK-NEXT: ret
140+
tail call void @llvm.aarch64.sme.write.zt.nxv8bf16(i32 0, <vscale x 8 x bfloat> %v)
141+
ret void
142+
}
143+
144+
define void @test_write_zt_f32(<vscale x 4 x float> %v) #0 {
145+
; CHECK-LABEL: test_write_zt_f32:
146+
; CHECK: // %bb.0:
147+
; CHECK-NEXT: movt zt0, z0
148+
; CHECK-NEXT: ret
149+
tail call void @llvm.aarch64.sme.write.zt.nxv4f32(i32 0, <vscale x 4 x float> %v)
150+
ret void
151+
}
152+
153+
define void @test_write_zt_f64(<vscale x 2 x double> %v) #0 {
154+
; CHECK-LABEL: test_write_zt_f64:
155+
; CHECK: // %bb.0:
156+
; CHECK-NEXT: movt zt0, z0
157+
; CHECK-NEXT: ret
158+
tail call void @llvm.aarch64.sme.write.zt.nxv2f64(i32 0, <vscale x 2 x double> %v)
159+
ret void
160+
}
161+
162+
attributes #0 = { "target-features"="+sme2,+sme-lutv2" }

0 commit comments

Comments
 (0)