Skip to content

Commit 9cbfce6

Browse files
committed
[AMDGPU] Use a target feature to enable __builtin_amdgcn_global_load_lds on gfx9 and gfx10
1 parent 638583a commit 9cbfce6

File tree

13 files changed

+55
-32
lines changed

13 files changed

+55
-32
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

+1-1
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "at
254254
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
255255
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
256256
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
257-
TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts")
257+
TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "mem-to-lds-load-insts")
258258

259259
//===----------------------------------------------------------------------===//
260260
// Deep learning builtins.

clang/lib/Basic/Targets/AMDGPU.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
260260

261261
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
262262
CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
263-
for (auto F : {"image-insts", "gws", "lds-buffer-load-insts"})
263+
for (auto F : {"image-insts", "gws", "mem-to-lds-load-insts"})
264264
ReadOnlyFeatures.insert(F);
265265
HalfArgsAndReturns = true;
266266
}

clang/test/CodeGen/link-builtin-bitcode.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,6 @@ int bar() { return no_attr() + attr_in_target() + attr_not_in_target() + attr_in
4444
// CHECK-SAME: () #[[ATTR_INCOMPATIBLE:[0-9]+]] {
4545

4646
// CHECK: attributes #[[ATTR_BAR]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
47-
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
48-
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
49-
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" }
47+
// CHECK: attributes #[[ATTR_COMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
48+
// CHECK: attributes #[[ATTR_EXTEND]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+extended-image-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
49+
// CHECK: attributes #[[ATTR_INCOMPATIBLE]] = { {{.*}} "target-cpu"="gfx90a" "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx90a-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,-gfx9-insts" }

clang/test/CodeGenCXX/dynamic-cast-address-space.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ const B& f(A *a) {
112112
// CHECK: attributes #[[ATTR3]] = { nounwind }
113113
// CHECK: attributes #[[ATTR4]] = { noreturn }
114114
//.
115-
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
115+
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
116116
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
117-
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lds-buffer-load-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
117+
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+mem-to-lds-load-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
118118
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
119119
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
120120
//.

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl renamed to clang/test/CodeGenOpenCL/builtins-amdgcn-global-load-lds.cl

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
23
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s
4+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
35
// REQUIRES: amdgpu-registered-target
46

57
typedef unsigned int u32;

clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl renamed to clang/test/SemaOpenCL/builtins-amdgcn-global-load-lds-err.cl

+9-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -S -verify=gfx942,expected -o - %s
1+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -verify=gfx,expected -o - %s
2+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -S -verify=gfx,expected -o - %s
3+
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -verify=gfx,expected -o - %s
24
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s
35
// REQUIRES: amdgpu-registered-target
46

@@ -8,12 +10,12 @@ void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32
810
__builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
911
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
1012
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
11-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
12-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
13-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
14-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx942-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}}
15-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx942-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}}
16-
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx942-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
13+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
14+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
15+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
16+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}}
17+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}}
18+
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
1719
}
1820

1921
__attribute__((target("gfx950-insts")))

flang/test/Lower/OpenMP/target_cpu_features.f90

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
!AMDGCN-SAME: fir.target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts",
1212
!AMDGCN-SAME: "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot3-insts",
1313
!AMDGCN-SAME: "+dot4-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp",
14-
!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+lds-buffer-load-insts",
14+
!AMDGCN-SAME: "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+mem-to-lds-load-insts",
1515
!AMDGCN-SAME: "+mai-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize64"]>
1616

1717
!NVPTX: module attributes {

llvm/lib/Target/AMDGPU/AMDGPU.td

+8-8
Original file line numberDiff line numberDiff line change
@@ -1269,10 +1269,10 @@ def FeatureDynamicVGPRBlockSize32 : SubtargetFeature<"dynamic-vgpr-block-size-32
12691269
"Use a block size of 32 for dynamic VGPR allocation (default is 16)"
12701270
>;
12711271

1272-
def FeatureLDSBufferLoad : SubtargetFeature<"lds-buffer-load-insts",
1273-
"HasLDSBufferLoad",
1272+
def FeatureMemToLDSLoad : SubtargetFeature<"mem-to-lds-load-insts",
1273+
"HasMemToLDSLoad",
12741274
"true",
1275-
"The platform has buffer_load lds instructions"
1275+
"The platform has memory to lds instructions (global_load w/lds bit set, buffer_load w/lds bit set or global_load_lds. This does not include scratch_load_lds."
12761276
>;
12771277

12781278
// Dummy feature used to disable assembler instructions.
@@ -1296,7 +1296,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
12961296
FeatureTrigReducedRange, FeatureExtendedImageInsts, FeatureImageInsts,
12971297
FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
12981298
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
1299-
FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
1299+
FeatureVmemWriteVgprInOrder
13001300
]
13011301
>;
13021302

@@ -1310,7 +1310,7 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
13101310
FeatureImageInsts, FeatureGDS, FeatureGWS, FeatureDefaultComponentZero,
13111311
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
13121312
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
1313-
FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
1313+
FeatureVmemWriteVgprInOrder
13141314
]
13151315
>;
13161316

@@ -1326,7 +1326,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
13261326
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
13271327
FeatureDsSrc2Insts, FeatureExtendedImageInsts, FeatureFastDenormalF32,
13281328
FeatureUnalignedBufferAccess, FeatureImageInsts, FeatureGDS, FeatureGWS,
1329-
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
1329+
FeatureDefaultComponentZero, FeatureVmemWriteVgprInOrder
13301330
]
13311331
>;
13321332

@@ -1345,7 +1345,7 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
13451345
FeatureA16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureSupportsXNACK,
13461346
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
13471347
FeatureUnalignedDSAccess, FeatureNegativeScratchOffsetBug, FeatureGWS,
1348-
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
1348+
FeatureDefaultComponentZero,FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
13491349
]
13501350
>;
13511351

@@ -1369,7 +1369,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
13691369
FeatureDefaultComponentZero, FeatureMaxHardClauseLength63,
13701370
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF64GlobalInsts,
13711371
FeatureAtomicFMinFMaxF32FlatInsts, FeatureAtomicFMinFMaxF64FlatInsts,
1372-
FeatureVmemWriteVgprInOrder, FeatureLDSBufferLoad
1372+
FeatureVmemWriteVgprInOrder, FeatureMemToLDSLoad
13731373
]
13741374
>;
13751375

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -3368,7 +3368,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
33683368
}
33693369

33703370
bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
3371-
if (!Subtarget->hasLDSBufferLoad())
3371+
if (!Subtarget->hasMemToLDSLoad())
33723372
return false;
33733373
unsigned Opc;
33743374
unsigned Size = MI.getOperand(3).getImm();
@@ -3505,6 +3505,9 @@ static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
35053505
}
35063506

35073507
bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
3508+
if (!Subtarget->hasMemToLDSLoad())
3509+
return false;
3510+
35083511
unsigned Opc;
35093512
unsigned Size = MI.getOperand(3).getImm();
35103513

llvm/lib/Target/AMDGPU/GCNSubtarget.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
193193
bool SupportsSRAMECC = false;
194194
bool DynamicVGPR = false;
195195
bool DynamicVGPRBlockSize32 = false;
196-
bool HasLDSBufferLoad = false;
196+
bool HasMemToLDSLoad = false;
197197

198198
// This should not be used directly. 'TargetID' tracks the dynamic settings
199199
// for SRAMECC.
@@ -1319,7 +1319,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13191319
return hasGFX950Insts();
13201320
}
13211321

1322-
bool hasLDSBufferLoad() const { return HasLDSBufferLoad; }
1322+
bool hasMemToLDSLoad() const { return HasMemToLDSLoad; }
13231323

13241324
bool hasSALUFloatInsts() const { return HasSALUFloatInsts; }
13251325

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -10104,7 +10104,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1010410104
case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
1010510105
case Intrinsic::amdgcn_struct_buffer_load_lds:
1010610106
case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
10107-
if (!Subtarget->hasLDSBufferLoad())
10107+
if (!Subtarget->hasMemToLDSLoad())
1010810108
return SDValue();
1010910109
unsigned Opc;
1011010110
bool HasVIndex =
@@ -10212,6 +10212,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
1021210212
return SDValue(Load, 0);
1021310213
}
1021410214
case Intrinsic::amdgcn_global_load_lds: {
10215+
if (!Subtarget->hasMemToLDSLoad())
10216+
return SDValue();
10217+
1021510218
unsigned Opc;
1021610219
unsigned Size = Op->getConstantOperandVal(4);
1021710220
switch (Size) {

llvm/lib/TargetParser/TargetParser.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
374374
Features["prng-inst"] = true;
375375
Features["wavefrontsize32"] = true;
376376
Features["wavefrontsize64"] = true;
377-
Features["lds-buffer-load-insts"] = true;
377+
Features["mem-to-lds-load-insts"] = true;
378378
} else if (T.isAMDGCN()) {
379379
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
380380
switch (Kind) {
@@ -460,7 +460,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
460460
Features["s-memrealtime"] = true;
461461
Features["s-memtime-inst"] = true;
462462
Features["gws"] = true;
463-
Features["lds-buffer-load-insts"] = true;
463+
Features["mem-to-lds-load-insts"] = true;
464464
break;
465465
case GK_GFX1012:
466466
case GK_GFX1011:
@@ -485,7 +485,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
485485
Features["s-memrealtime"] = true;
486486
Features["s-memtime-inst"] = true;
487487
Features["gws"] = true;
488-
Features["lds-buffer-load-insts"] = true;
488+
Features["mem-to-lds-load-insts"] = true;
489489
break;
490490
case GK_GFX950:
491491
Features["bitop3-insts"] = true;
@@ -536,7 +536,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
536536
Features["ci-insts"] = true;
537537
Features["s-memtime-inst"] = true;
538538
Features["gws"] = true;
539-
Features["lds-buffer-load-insts"] = true;
539+
Features["mem-to-lds-load-insts"] = true;
540540
break;
541541
case GK_GFX90A:
542542
Features["gfx90a-insts"] = true;
@@ -589,7 +589,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
589589
Features["image-insts"] = true;
590590
Features["s-memtime-inst"] = true;
591591
Features["gws"] = true;
592-
Features["lds-buffer-load-insts"] = true;
592+
Features["mem-to-lds-load-insts"] = true;
593593
break;
594594
case GK_NONE:
595595
break;

0 commit comments

Comments
 (0)