Skip to content

Commit 041e842

Browse files
authored
[Clang][AMDGPU] Expose buffer load lds as a clang builtin (#132048)
CK is using either inline assembly or inline LLVM-IR builtins to generate buffer_load_dword lds instructions. This patch exposes this instruction as a Clang builtin available on gfx9 and gfx10. Related to SWDEV-519702 and SWDEV-518861
1 parent 91f3965 commit 041e842

7 files changed

+35
-7
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

+2
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
163163
BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
164164
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
165165

166+
TARGET_BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t", "vmem-to-lds-load-insts")
167+
166168
//===----------------------------------------------------------------------===//
167169
// Ballot builtins.
168170
//===----------------------------------------------------------------------===//

clang/include/clang/Basic/DiagnosticSemaKinds.td

+2-2
Original file line numberDiff line numberDiff line change
@@ -13056,6 +13056,6 @@ def err_acc_decl_for_routine
1305613056
: Error<"expected function or lambda declaration for 'routine' construct">;
1305713057

1305813058
// AMDGCN builtins diagnostics
13059-
def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
13060-
def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
13059+
def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
13060+
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
1306113061
} // end of sema component.

clang/lib/Sema/SemaAMDGPU.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
3535
Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
3636

3737
switch (BuiltinID) {
38+
case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
3839
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
3940
constexpr const int SizeIdx = 2;
4041
llvm::APSInt Size;
@@ -54,11 +55,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
5455
[[fallthrough]];
5556
}
5657
default:
57-
Diag(ArgExpr->getExprLoc(),
58-
diag::err_amdgcn_global_load_lds_size_invalid_value)
58+
Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value)
5959
<< ArgExpr->getSourceRange();
60-
Diag(ArgExpr->getExprLoc(),
61-
diag::note_amdgcn_global_load_lds_size_valid_value)
60+
Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value)
6261
<< HasGFX950Insts << ArgExpr->getSourceRange();
6362
return true;
6463
}

clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl

+9
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,12 @@ v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc
170170
v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
171171
return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
172172
}
173+
174+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
175+
// CHECK-NEXT: entry:
176+
// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
177+
// CHECK-NEXT: ret void
178+
//
179+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
180+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
181+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify=gfx90a,expected -o - %s
2+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s
3+
// REQUIRES: amdgpu-registered-target
4+
5+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
6+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
7+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
8+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
9+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} gfx950-note{{size must be 1, 2, 4, 12 or 16}} gfx90a-note{{size must be 1, 2, or 4}}
10+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -verify -o - %s
2+
// REQUIRES: amdgpu-registered-target
3+
4+
void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
5+
__builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, 0); //expected-error{{needs target feature vmem-to-lds-load-insts}}
6+
}

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

+3-1
Original file line numberDiff line numberDiff line change
@@ -1863,7 +1863,9 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
18631863
ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
18641864
def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;
18651865

1866-
class AMDGPURawPtrBufferLoadLDS : Intrinsic <
1866+
class AMDGPURawPtrBufferLoadLDS :
1867+
ClangBuiltin<"__builtin_amdgcn_raw_ptr_buffer_load_lds">,
1868+
Intrinsic <
18671869
[],
18681870
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
18691871
LLVMQualPointerType<3>, // LDS base offset

0 commit comments

Comments
 (0)