Skip to content

Commit 61c9004

Browse files
author
anikelal
committed
OpenCL allows a kernel function to call another kernel function.
To facilitate this we emit a stub version of each kernel function with different name mangling scheme, and replace the kernel callsite appropriately. #60313 https://ontrack-internal.amd.com/browse/SWDEV-245936
1 parent 55f5d68 commit 61c9004

13 files changed

+160
-33
lines changed

clang/include/clang/AST/GlobalDecl.h

+26-11
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,19 @@ class GlobalDecl {
7171
GlobalDecl(const FunctionDecl *D, unsigned MVIndex = 0)
7272
: MultiVersionIndex(MVIndex) {
7373
if (!D->hasAttr<CUDAGlobalAttr>()) {
74+
if (D->hasAttr<OpenCLKernelAttr>()) {
75+
Value.setPointerAndInt(D, unsigned(KernelReferenceKind::Kernel));
76+
return;
77+
}
7478
Init(D);
7579
return;
7680
}
7781
Value.setPointerAndInt(D, unsigned(getDefaultKernelReference(D)));
7882
}
7983
GlobalDecl(const FunctionDecl *D, KernelReferenceKind Kind)
8084
: Value(D, unsigned(Kind)) {
81-
assert(D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!");
85+
assert((D->hasAttr<CUDAGlobalAttr>() && "Decl is not a GPU kernel!") ||
86+
(D->hasAttr<OpenCLKernelAttr>() && "Decl is not a OpenCL kernel!"));
8287
}
8388
GlobalDecl(const NamedDecl *D) { Init(D); }
8489
GlobalDecl(const BlockDecl *D) { Init(D); }
@@ -130,13 +135,15 @@ class GlobalDecl {
130135
}
131136

132137
KernelReferenceKind getKernelReferenceKind() const {
133-
assert(((isa<FunctionDecl>(getDecl()) &&
134-
cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
135-
(isa<FunctionTemplateDecl>(getDecl()) &&
136-
cast<FunctionTemplateDecl>(getDecl())
137-
->getTemplatedDecl()
138-
->hasAttr<CUDAGlobalAttr>())) &&
139-
"Decl is not a GPU kernel!");
138+
assert((((isa<FunctionDecl>(getDecl()) &&
139+
cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
140+
(isa<FunctionTemplateDecl>(getDecl()) &&
141+
cast<FunctionTemplateDecl>(getDecl())
142+
->getTemplatedDecl()
143+
->hasAttr<CUDAGlobalAttr>())) &&
144+
"Decl is not a GPU kernel!") ||
145+
(isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
146+
140147
return static_cast<KernelReferenceKind>(Value.getInt());
141148
}
142149

@@ -196,13 +203,21 @@ class GlobalDecl {
196203
}
197204

198205
GlobalDecl getWithKernelReferenceKind(KernelReferenceKind Kind) {
199-
assert(isa<FunctionDecl>(getDecl()) &&
200-
cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
201-
"Decl is not a GPU kernel!");
206+
assert((isa<FunctionDecl>(getDecl()) &&
207+
cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
208+
"Decl is not a GPU kernel!") ||
209+
(isDeclOpenCLKernel() && "Decl is not a OpenCL kernel!"));
202210
GlobalDecl Result(*this);
203211
Result.Value.setInt(unsigned(Kind));
204212
return Result;
205213
}
214+
215+
bool isDeclOpenCLKernel() const {
216+
auto FD = dyn_cast<FunctionDecl>(getDecl());
217+
if (FD)
218+
return FD->hasAttr<OpenCLKernelAttr>();
219+
return FD;
220+
}
206221
};
207222

208223
} // namespace clang

clang/lib/AST/Expr.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,8 @@ std::string PredefinedExpr::ComputeName(PredefinedIdentKind IK,
693693
GD = GlobalDecl(CD, Ctor_Base);
694694
else if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(ND))
695695
GD = GlobalDecl(DD, Dtor_Base);
696-
else if (ND->hasAttr<CUDAGlobalAttr>())
696+
else if (ND->hasAttr<CUDAGlobalAttr>() ||
697+
ND->hasAttr<OpenCLKernelAttr>())
697698
GD = GlobalDecl(cast<FunctionDecl>(ND));
698699
else
699700
GD = GlobalDecl(ND);

clang/lib/AST/ItaniumMangle.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ class CXXNameMangler {
526526
void mangleSourceName(const IdentifierInfo *II);
527527
void mangleRegCallName(const IdentifierInfo *II);
528528
void mangleDeviceStubName(const IdentifierInfo *II);
529+
void mangleOCLDeviceStubName(const IdentifierInfo *II);
529530
void mangleSourceNameWithAbiTags(
530531
const NamedDecl *ND, const AbiTagList *AdditionalAbiTags = nullptr);
531532
void mangleLocalName(GlobalDecl GD,
@@ -1561,8 +1562,13 @@ void CXXNameMangler::mangleUnqualifiedName(
15611562
bool IsDeviceStub =
15621563
FD && FD->hasAttr<CUDAGlobalAttr>() &&
15631564
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
1565+
bool IsOCLDeviceStub =
1566+
FD && FD->hasAttr<OpenCLKernelAttr>() &&
1567+
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
15641568
if (IsDeviceStub)
15651569
mangleDeviceStubName(II);
1570+
else if (IsOCLDeviceStub)
1571+
mangleOCLDeviceStubName(II);
15661572
else if (IsRegCall)
15671573
mangleRegCallName(II);
15681574
else
@@ -1780,6 +1786,15 @@ void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
17801786
<< II->getName();
17811787
}
17821788

1789+
void CXXNameMangler::mangleOCLDeviceStubName(const IdentifierInfo *II) {
1790+
// <source-name> ::= <positive length number> __clang_ocl_kern_imp_
1791+
// <identifier> <number> ::= [n] <non-negative decimal integer> <identifier>
1792+
// ::= <unqualified source code identifier>
1793+
StringRef OCLDeviceStubNamePrefix = "__clang_ocl_kern_imp_";
1794+
Out << II->getLength() + OCLDeviceStubNamePrefix.size() - 1
1795+
<< OCLDeviceStubNamePrefix << II->getName();
1796+
}
1797+
17831798
void CXXNameMangler::mangleSourceName(const IdentifierInfo *II) {
17841799
// <source-name> ::= <positive length number> <identifier>
17851800
// <number> ::= [n] <non-negative decimal integer>

clang/lib/AST/Mangle.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ class ASTNameGenerator::Implementation {
539539
GD = GlobalDecl(CtorD, Ctor_Complete);
540540
else if (const auto *DtorD = dyn_cast<CXXDestructorDecl>(D))
541541
GD = GlobalDecl(DtorD, Dtor_Complete);
542-
else if (D->hasAttr<CUDAGlobalAttr>())
542+
else if (D->hasAttr<CUDAGlobalAttr>() || D->hasAttr<OpenCLKernelAttr>())
543543
GD = GlobalDecl(cast<FunctionDecl>(D));
544544
else
545545
GD = GlobalDecl(D);

clang/lib/AST/MicrosoftMangle.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -1161,9 +1161,15 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
11611161
->getTemplatedDecl()
11621162
->hasAttr<CUDAGlobalAttr>())) &&
11631163
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
1164+
bool IsOCLDeviceStub =
1165+
ND && (isa<FunctionDecl>(ND) && ND->hasAttr<OpenCLKernelAttr>()) &&
1166+
GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
11641167
if (IsDeviceStub)
11651168
mangleSourceName(
11661169
(llvm::Twine("__device_stub__") + II->getName()).str());
1170+
else if (IsOCLDeviceStub)
1171+
mangleSourceName(
1172+
(llvm::Twine("__clang_ocl_kern_imp_") + II->getName()).str());
11671173
else
11681174
mangleSourceName(II->getName());
11691175
break;

clang/lib/CodeGen/CGCall.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -2343,6 +2343,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
23432343
// Collect function IR attributes from the CC lowering.
23442344
// We'll collect the paramete and result attributes later.
23452345
CallingConv = FI.getEffectiveCallingConvention();
2346+
GlobalDecl GD = CalleeInfo.getCalleeDecl();
2347+
const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
2348+
if (TargetDecl) {
2349+
if (auto FD = dyn_cast<FunctionDecl>(TargetDecl)) {
2350+
if (FD->hasAttr<OpenCLKernelAttr>() &&
2351+
GD.getKernelReferenceKind() == KernelReferenceKind::Stub)
2352+
CallingConv = llvm::CallingConv::C;
2353+
}
2354+
}
23462355
if (FI.isNoReturn())
23472356
FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
23482357
if (FI.isCmseNSCall())
@@ -2352,8 +2361,6 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
23522361
AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
23532362
CalleeInfo.getCalleeFunctionProtoType());
23542363

2355-
const Decl *TargetDecl = CalleeInfo.getCalleeDecl().getDecl();
2356-
23572364
// Attach assumption attributes to the declaration. If this is a call
23582365
// site, attach assumptions from the caller to the call as well.
23592366
AddAttributesFromOMPAssumes(FuncAttrs, TargetDecl);

clang/lib/CodeGen/CGExpr.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -5691,7 +5691,10 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) {
56915691
// Resolve direct calls.
56925692
} else if (auto DRE = dyn_cast<DeclRefExpr>(E)) {
56935693
if (auto FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
5694-
return EmitDirectCallee(*this, FD);
5694+
auto CalleeDecl = FD->hasAttr<OpenCLKernelAttr>()
5695+
? GlobalDecl(FD, KernelReferenceKind::Stub)
5696+
: FD;
5697+
return EmitDirectCallee(*this, CalleeDecl);
56955698
}
56965699
} else if (auto ME = dyn_cast<MemberExpr>(E)) {
56975700
if (auto FD = dyn_cast<FunctionDecl>(ME->getMemberDecl())) {

clang/lib/CodeGen/CGOpenCLRuntime.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,10 @@ static const BlockExpr *getBlockExpr(const Expr *E) {
127127
void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
128128
llvm::Function *InvokeF,
129129
llvm::Value *Block, llvm::Type *BlockTy) {
130-
assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
130+
131+
// FIXME: Since OpenCL Kernels are emitted twice (kernel version and stub
132+
// version), its constituent BlockExpr will also be emitted twice.
133+
// assert(!EnqueuedBlockMap.contains(E) && "Block expression emitted twice");
131134
assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
132135
assert(Block->getType()->isPointerTy() && "Invalid block literal type");
133136
EnqueuedBlockMap[E].InvokeFunc = InvokeF;

clang/lib/CodeGen/CodeGenModule.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -1887,6 +1887,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
18871887
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
18881888
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
18891889
Out << "__device_stub__" << II->getName();
1890+
} else if (FD && FD->hasAttr<OpenCLKernelAttr>() &&
1891+
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
1892+
Out << "__clang_ocl_kern_imp_" << II->getName();
18901893
} else {
18911894
Out << II->getName();
18921895
}
@@ -3841,6 +3844,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
38413844

38423845
// Ignore declarations, they will be emitted on their first use.
38433846
if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
3847+
3848+
if (FD->hasAttr<OpenCLKernelAttr>() && FD->doesThisDeclarationHaveABody())
3849+
addDeferredDeclToEmit(GlobalDecl(FD, KernelReferenceKind::Stub));
3850+
38443851
// Update deferred annotations with the latest declaration if the function
38453852
// function was already used or defined.
38463853
if (FD->hasAttr<AnnotateAttr>()) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
2+
3+
// CHECK: define dso_local amdgpu_kernel void @callee_kern({{.*}})
4+
__attribute__((noinline)) kernel void callee_kern(global int *A){
5+
*A = 1;
6+
}
7+
8+
__attribute__((noinline)) kernel void ext_callee_kern(global int *A);
9+
10+
// CHECK: define dso_local void @callee_func({{.*}})
11+
__attribute__((noinline)) void callee_func(global int *A){
12+
*A = 2;
13+
}
14+
15+
// CHECK: define dso_local amdgpu_kernel void @caller_kern({{.*}})
16+
kernel void caller_kern(global int* A){
17+
callee_kern(A);
18+
// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}})
19+
ext_callee_kern(A);
20+
// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
21+
callee_func(A);
22+
// CHECK: tail call void @callee_func({{.*}})
23+
24+
}
25+
26+
// CHECK: define dso_local void @__clang_ocl_kern_imp_callee_kern({{.*}})
27+
28+
// CHECK: declare void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
29+
30+
// CHECK: define dso_local void @caller_func({{.*}})
31+
void caller_func(global int* A){
32+
callee_kern(A);
33+
// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}}) #7
34+
ext_callee_kern(A);
35+
// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}}) #8
36+
callee_func(A);
37+
// CHECK: tail call void @callee_func({{.*}})
38+
}
39+
40+
// CHECK: define dso_local void @__clang_ocl_kern_imp_caller_kern({{.*}})
41+
// CHECK: tail call void @__clang_ocl_kern_imp_callee_kern({{.*}})
42+
// CHECK: tail call void @__clang_ocl_kern_imp_ext_callee_kern({{.*}})
43+
// CHECK: tail call void @callee_func({{.*}})

clang/test/CodeGenOpenCL/reflect.cl

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ bool device_function() {
1313
}
1414

1515
// CHECK-LABEL: define dso_local spir_kernel void @kernel_function(
16-
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
16+
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !5 !kernel_arg_access_qual !6 !kernel_arg_type !7 !kernel_arg_base_type !7 !kernel_arg_type_qual !8 {
1717
// CHECK-NEXT: entry:
1818
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
1919
// CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4

clang/test/CodeGenOpenCL/spir-calling-conv.cl

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ kernel void foo(global int *A)
1111
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
1212
A[id] = id;
1313
bar(A);
14-
// CHECK: tail call spir_kernel void @bar(ptr addrspace(1) noundef align 4 %A)
14+
// CHECK: tail call void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4 %A)
1515
}
1616

1717
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
18-
// CHECK: declare spir_kernel void @bar(ptr addrspace(1) noundef align 4)
18+
// CHECK: declare void @__clang_ocl_kern_imp_bar(ptr addrspace(1) noundef align 4)

clang/test/CodeGenOpenCL/visibility.cl

+40-13
Original file line numberDiff line numberDiff line change
@@ -85,31 +85,42 @@ __attribute__((visibility("default"))) extern void ext_func_default();
8585
void use() {
8686
glob = ext + ext_hidden + ext_protected + ext_default;
8787
ext_kern();
88+
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern()
89+
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern()
90+
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern()
8891
ext_kern_hidden();
92+
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
93+
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
94+
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_hidden()
8995
ext_kern_protected();
96+
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
97+
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
98+
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_protected()
9099
ext_kern_default();
100+
// FVIS-DEFAULT: tail call void @__clang_ocl_kern_imp_ext_kern_default()
101+
// FVIS-PROTECTED: tail call void @__clang_ocl_kern_imp_ext_kern_default()
102+
// FVIS-HIDDEN: tail call void @__clang_ocl_kern_imp_ext_kern_default()
91103
ext_func();
92104
ext_func_hidden();
93105
ext_func_protected();
94106
ext_func_default();
95107
}
96108

97-
// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern()
98-
// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern()
99-
// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern()
109+
// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern()
110+
// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern()
111+
// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern()
100112

101-
// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_hidden()
102-
// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_hidden()
103-
// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_hidden()
113+
// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
114+
// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
115+
// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_hidden()
104116

105-
// FVIS-DEFAULT: declare protected amdgpu_kernel void @ext_kern_protected()
106-
// FVIS-PROTECTED: declare protected amdgpu_kernel void @ext_kern_protected()
107-
// FVIS-HIDDEN: declare protected amdgpu_kernel void @ext_kern_protected()
108-
109-
// FVIS-DEFAULT: declare amdgpu_kernel void @ext_kern_default()
110-
// FVIS-PROTECTED: declare amdgpu_kernel void @ext_kern_default()
111-
// FVIS-HIDDEN: declare amdgpu_kernel void @ext_kern_default()
117+
// FVIS-DEFAULT: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
118+
// FVIS-PROTECTED: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
119+
// FVIS-HIDDEN: declare protected void @__clang_ocl_kern_imp_ext_kern_protected()
112120

121+
// FVIS-DEFAULT: declare void @__clang_ocl_kern_imp_ext_kern_default()
122+
// FVIS-PROTECTED: declare void @__clang_ocl_kern_imp_ext_kern_default()
123+
// FVIS-HIDDEN: declare void @__clang_ocl_kern_imp_ext_kern_default()
113124

114125
// FVIS-DEFAULT: declare void @ext_func()
115126
// FVIS-PROTECTED: declare protected void @ext_func()
@@ -126,3 +137,19 @@ void use() {
126137
// FVIS-DEFAULT: declare void @ext_func_default()
127138
// FVIS-PROTECTED: declare void @ext_func_default()
128139
// FVIS-HIDDEN: declare void @ext_func_default()
140+
141+
// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern()
142+
// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern()
143+
// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern()
144+
145+
// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_hidden()
146+
// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_hidden()
147+
// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_hidden()
148+
149+
// FVIS-DEFAULT: define protected void @__clang_ocl_kern_imp_kern_protected()
150+
// FVIS-PROTECTED: define protected void @__clang_ocl_kern_imp_kern_protected()
151+
// FVIS-HIDDEN: define protected void @__clang_ocl_kern_imp_kern_protected()
152+
153+
// FVIS-DEFAULT: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
154+
// FVIS-PROTECTED: define{{.*}} void @__clang_ocl_kern_imp_kern_default()
155+
// FVIS-HIDDEN: define{{.*}} void @__clang_ocl_kern_imp_kern_default()

0 commit comments

Comments
 (0)