|
7 | 7 | ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s
|
8 | 8 | ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s
|
9 | 9 |
|
10 |
| -define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) { |
| 10 | +define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) #0 { |
11 | 11 | ; GFX940-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg:
|
12 | 12 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
13 | 13 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -74,7 +74,7 @@ define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8
|
74 | 74 | ret void
|
75 | 75 | }
|
76 | 76 |
|
77 |
| -define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) { |
| 77 | +define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) #0 { |
78 | 78 | ; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg:
|
79 | 79 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
80 | 80 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -145,7 +145,7 @@ define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out
|
145 | 145 | ret void
|
146 | 146 | }
|
147 | 147 |
|
148 |
| -define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) { |
| 148 | +define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) #0 { |
149 | 149 | ; GFX940-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg:
|
150 | 150 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
151 | 151 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -212,7 +212,7 @@ define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i1
|
212 | 212 | ret void
|
213 | 213 | }
|
214 | 214 |
|
215 |
| -define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i32 %arg0) { |
| 215 | +define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i32 %arg0) #0 { |
216 | 216 | ; GFX940-NO-PRELOAD-LABEL: ptr1_i32_kernel_preload_arg:
|
217 | 217 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
218 | 218 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -273,7 +273,7 @@ define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i3
|
273 | 273 | }
|
274 | 274 |
|
275 | 275 |
|
276 |
| -define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) { |
| 276 | +define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) #0 { |
277 | 277 | ; GFX940-NO-PRELOAD-LABEL: i32_ptr1_i32_kernel_preload_arg:
|
278 | 278 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
279 | 279 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10
|
@@ -346,7 +346,7 @@ define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrsp
|
346 | 346 | ret void
|
347 | 347 | }
|
348 | 348 |
|
349 |
| -define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) { |
| 349 | +define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) #0 { |
350 | 350 | ; GFX940-NO-PRELOAD-LABEL: ptr1_i16_i16_kernel_preload_arg:
|
351 | 351 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
352 | 352 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -431,7 +431,7 @@ define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out
|
431 | 431 | ret void
|
432 | 432 | }
|
433 | 433 |
|
434 |
| -define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) { |
| 434 | +define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) #0 { |
435 | 435 | ; GFX940-NO-PRELOAD-LABEL: ptr1_v2i8_kernel_preload_arg:
|
436 | 436 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
437 | 437 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -500,7 +500,7 @@ define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <
|
500 | 500 | }
|
501 | 501 |
|
502 | 502 |
|
503 |
| -define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { |
| 503 | +define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) #0 { |
504 | 504 | ; GFX940-NO-PRELOAD-LABEL: byref_kernel_preload_arg:
|
505 | 505 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
506 | 506 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100
|
@@ -595,7 +595,7 @@ define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr a
|
595 | 595 | }
|
596 | 596 |
|
597 | 597 |
|
598 |
| -define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { |
| 598 | +define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) #0 { |
599 | 599 | ; GFX940-NO-PRELOAD-LABEL: v8i32_kernel_preload_arg:
|
600 | 600 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
601 | 601 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20
|
@@ -717,7 +717,7 @@ define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %
|
717 | 717 | ret void
|
718 | 718 | }
|
719 | 719 |
|
720 |
| -define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { |
| 720 | +define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) #0 { |
721 | 721 | ; GFX940-NO-PRELOAD-LABEL: v3i16_kernel_preload_arg:
|
722 | 722 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
723 | 723 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -787,7 +787,7 @@ define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %
|
787 | 787 | ret void
|
788 | 788 | }
|
789 | 789 |
|
790 |
| -define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { |
| 790 | +define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) #0 { |
791 | 791 | ; GFX940-NO-PRELOAD-LABEL: v3i32_kernel_preload_arg:
|
792 | 792 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
793 | 793 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
@@ -859,7 +859,7 @@ define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %
|
859 | 859 | ret void
|
860 | 860 | }
|
861 | 861 |
|
862 |
| -define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { |
| 862 | +define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) #0 { |
863 | 863 | ; GFX940-NO-PRELOAD-LABEL: v3f32_kernel_preload_arg:
|
864 | 864 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
865 | 865 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
@@ -931,7 +931,7 @@ define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %
|
931 | 931 | ret void
|
932 | 932 | }
|
933 | 933 |
|
934 |
| -define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind { |
| 934 | +define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) #0 { |
935 | 935 | ; GFX940-NO-PRELOAD-LABEL: v5i8_kernel_preload_arg:
|
936 | 936 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
937 | 937 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -1029,7 +1029,7 @@ define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %o
|
1029 | 1029 | ret void
|
1030 | 1030 | }
|
1031 | 1031 |
|
1032 |
| -define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind { |
| 1032 | +define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) #0 { |
1033 | 1033 | ; GFX940-NO-PRELOAD-LABEL: v5f64_kernel_preload_arg:
|
1034 | 1034 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1035 | 1035 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60
|
@@ -1169,7 +1169,7 @@ define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %
|
1169 | 1169 | ret void
|
1170 | 1170 | }
|
1171 | 1171 |
|
1172 |
| -define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) { |
| 1172 | +define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) #0 { |
1173 | 1173 | ; GFX940-NO-PRELOAD-LABEL: v8i8_kernel_preload_arg:
|
1174 | 1174 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1175 | 1175 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -1289,7 +1289,7 @@ define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i
|
1289 | 1289 | ret void
|
1290 | 1290 | }
|
1291 | 1291 |
|
1292 |
| -define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) { |
| 1292 | +define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) #0 { |
1293 | 1293 | ; GFX940-NO-PRELOAD-LABEL: i64_kernel_preload_arg:
|
1294 | 1294 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1295 | 1295 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -1349,7 +1349,7 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a)
|
1349 | 1349 | ret void
|
1350 | 1350 | }
|
1351 | 1351 |
|
1352 |
| -define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double %in) { |
| 1352 | +define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double %in) #0 { |
1353 | 1353 | ; GFX940-NO-PRELOAD-LABEL: f64_kernel_preload_arg:
|
1354 | 1354 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1355 | 1355 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -1409,7 +1409,7 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double
|
1409 | 1409 | ret void
|
1410 | 1410 | }
|
1411 | 1411 |
|
1412 |
| -define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) { |
| 1412 | +define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) #0 { |
1413 | 1413 | ; GFX940-NO-PRELOAD-LABEL: half_kernel_preload_arg:
|
1414 | 1414 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1415 | 1415 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -1469,7 +1469,7 @@ define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %
|
1469 | 1469 | ret void
|
1470 | 1470 | }
|
1471 | 1471 |
|
1472 |
| -define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) { |
| 1472 | +define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) #0 { |
1473 | 1473 | ; GFX940-NO-PRELOAD-LABEL: bfloat_kernel_preload_arg:
|
1474 | 1474 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1475 | 1475 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -1529,7 +1529,7 @@ define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bflo
|
1529 | 1529 | ret void
|
1530 | 1530 | }
|
1531 | 1531 |
|
1532 |
| -define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2 x bfloat> %in) { |
| 1532 | +define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2 x bfloat> %in) #0 { |
1533 | 1533 | ; GFX940-NO-PRELOAD-LABEL: v2bfloat_kernel_preload_arg:
|
1534 | 1534 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1535 | 1535 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -1589,7 +1589,7 @@ define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2
|
1589 | 1589 | ret void
|
1590 | 1590 | }
|
1591 | 1591 |
|
1592 |
| -define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3 x bfloat> %in) { |
| 1592 | +define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3 x bfloat> %in) #0 { |
1593 | 1593 | ; GFX940-NO-PRELOAD-LABEL: v3bfloat_kernel_preload_arg:
|
1594 | 1594 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1595 | 1595 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -1659,7 +1659,7 @@ define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3
|
1659 | 1659 | ret void
|
1660 | 1660 | }
|
1661 | 1661 |
|
1662 |
| -define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6 x bfloat> %in) { |
| 1662 | +define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6 x bfloat> %in) #0 { |
1663 | 1663 | ; GFX940-NO-PRELOAD-LABEL: v6bfloat_kernel_preload_arg:
|
1664 | 1664 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1665 | 1665 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
@@ -1731,7 +1731,7 @@ define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6
|
1731 | 1731 | ret void
|
1732 | 1732 | }
|
1733 | 1733 |
|
1734 |
| -define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %out, half %in, <7 x bfloat> %in2, ptr addrspace(1) %out2) { |
| 1734 | +define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %out, half %in, <7 x bfloat> %in2, ptr addrspace(1) %out2) #0 { |
1735 | 1735 | ; GFX940-NO-PRELOAD-LABEL: half_v7bfloat_kernel_preload_arg:
|
1736 | 1736 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1737 | 1737 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s10, s[0:1], 0x8
|
@@ -1840,7 +1840,7 @@ define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %ou
|
1840 | 1840 | ret void
|
1841 | 1841 | }
|
1842 | 1842 |
|
1843 |
| -define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in) { |
| 1843 | +define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in) #0 { |
1844 | 1844 | ; GFX940-NO-PRELOAD-LABEL: i1_kernel_preload_arg:
|
1845 | 1845 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1846 | 1846 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8
|
@@ -1906,7 +1906,7 @@ define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in)
|
1906 | 1906 | ret void
|
1907 | 1907 | }
|
1908 | 1908 |
|
1909 |
| -define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128 %in) { |
| 1909 | +define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128 %in) #0 { |
1910 | 1910 | ; GFX940-NO-PRELOAD-LABEL: fp128_kernel_preload_arg:
|
1911 | 1911 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1912 | 1912 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
@@ -1980,7 +1980,7 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128
|
1980 | 1980 | ret void
|
1981 | 1981 | }
|
1982 | 1982 |
|
1983 |
| -define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i8> %in) { |
| 1983 | +define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i8> %in) #0 { |
1984 | 1984 | ; GFX940-NO-PRELOAD-LABEL: v7i8_kernel_preload_arg:
|
1985 | 1985 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
1986 | 1986 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
|
@@ -2096,7 +2096,7 @@ define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i
|
2096 | 2096 | ret void
|
2097 | 2097 | }
|
2098 | 2098 |
|
2099 |
| -define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x half> %in) { |
| 2099 | +define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x half> %in) #0 { |
2100 | 2100 | ; GFX940-NO-PRELOAD-LABEL: v7half_kernel_preload_arg:
|
2101 | 2101 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
2102 | 2102 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
@@ -2181,7 +2181,7 @@ define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x
|
2181 | 2181 | }
|
2182 | 2182 |
|
2183 | 2183 | ; Test when previous argument was not dword aligned.
|
2184 |
| -define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i32 %in2, ptr addrspace(1) %out2) { |
| 2184 | +define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i32 %in2, ptr addrspace(1) %out2) #0 { |
2185 | 2185 | ; GFX940-NO-PRELOAD-LABEL: i16_i32_kernel_preload_arg:
|
2186 | 2186 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
2187 | 2187 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
|
@@ -2260,7 +2260,7 @@ define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16
|
2260 | 2260 | ret void
|
2261 | 2261 | }
|
2262 | 2262 |
|
2263 |
| -define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <3 x i32> %in2, ptr addrspace(1) %out2) { |
| 2263 | +define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <3 x i32> %in2, ptr addrspace(1) %out2) #0 { |
2264 | 2264 | ; GFX940-NO-PRELOAD-LABEL: i16_v3i32_kernel_preload_arg:
|
2265 | 2265 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
2266 | 2266 | ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
|
@@ -2359,7 +2359,7 @@ define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i
|
2359 | 2359 | ret void
|
2360 | 2360 | }
|
2361 | 2361 |
|
2362 |
| -define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i16 %in2, ptr addrspace(1) %out2) { |
| 2362 | +define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i16 %in2, ptr addrspace(1) %out2) #0 { |
2363 | 2363 | ; GFX940-NO-PRELOAD-LABEL: i16_i16_kernel_preload_arg:
|
2364 | 2364 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
2365 | 2365 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s6, s[0:1], 0x8
|
@@ -2436,7 +2436,7 @@ define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16
|
2436 | 2436 | ret void
|
2437 | 2437 | }
|
2438 | 2438 |
|
2439 |
| -define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <2 x i8> %in2, ptr addrspace(1) %out2) { |
| 2439 | +define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <2 x i8> %in2, ptr addrspace(1) %out2) #0 { |
2440 | 2440 | ; GFX940-NO-PRELOAD-LABEL: i16_v2i8_kernel_preload_arg:
|
2441 | 2441 | ; GFX940-NO-PRELOAD: ; %bb.0:
|
2442 | 2442 | ; GFX940-NO-PRELOAD-NEXT: s_load_dword s6, s[0:1], 0x8
|
@@ -2520,3 +2520,5 @@ define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i1
|
2520 | 2520 | store <2 x i8> %in2, ptr addrspace(1) %out2
|
2521 | 2521 | ret void
|
2522 | 2522 | }
|
| 2523 | + |
| 2524 | +attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } |
0 commit comments