Skip to content

Commit 1ca3935

Browse files
PawelJurekgfxbot
authored andcommitted
TransformBlocks fixes.
1. Handle the case where block descriptor structure is not used in the invoked function. 2. SPIR-V OpEnqueueKernel will generate LocalSize arguments as pointers. Load the value before storing it. 3. Minor refactoring - adding enum for DeviceEnqueue functions Change-Id: I29c169fb92ef3723b9e098e2f4b3cc4ddaa4c855
1 parent bd4f7d0 commit 1ca3935

File tree

1 file changed

+98
-55
lines changed

1 file changed

+98
-55
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/DeviceEnqueueFuncs/TransformBlocks.cpp

Lines changed: 98 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6363
#include "common/LLVMWarningsPop.hpp"
6464

6565
#include <algorithm>
66+
#include <map>
6667

6768
using namespace llvm;
6869
using namespace IGC;
@@ -108,22 +109,42 @@ namespace //Anonymous
108109
SAMPLER
109110
};
110111

111-
const auto FNAME_ENQUEUE_KERNEL = "_Z14enqueue_kernel";
112-
const auto FNAME_ENQUEUE_KERNEL_BASIC = "__enqueue_kernel_basic";
113-
const auto FNAME_ENQUEUE_KERNEL_VAARGS = "__enqueue_kernel_vaargs";
114-
const auto FNAME_ENQUEUE_KERNEL_EVENTS_VAARGS = "__enqueue_kernel_events_vaargs";
115-
const auto FNAME_WORK_GROUP_SIZE_IMPL = "__get_kernel_work_group_size_impl";
116-
const auto FNAME_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = "_Z45get_kernel_preferred_work_group_size_multiple";
117-
const auto FNAME_PREFERRED_WORK_GROUP_MULTIPLE_IMPL = "__get_kernel_preferred_work_group_multiple_impl";
118-
const auto FNAME_MAX_SUB_GROUP_SIZE_FOR_NDRANGE = "_Z41get_kernel_max_sub_group_size_for_ndrange";
119-
const auto FNAME_SUB_GROUP_COUNT_FOR_NDRANGE = "_Z38get_kernel_sub_group_count_for_ndrange";
120-
121-
const auto FNAME_SPIRV_ENQUEUE_KERNEL = "__builtin_spirv_OpEnqueueKernel";
122-
const auto FNAME_SPIRV_SUB_GROUP_COUNT_FOR_NDRANGE = "__builtin_spirv_OpGetKernelNDrangeSubGroupCount";
123-
const auto FNAME_SPIRV_MAX_SUB_GROUP_SIZE_FOR_NDRANGE = "__builtin_spirv_OpGetKernelNDrangeMaxSubGroupSize";
124-
const auto FNAME_SPIRV_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = "__builtin_spirv_OpGetKernelPreferredWorkGroupSizeMultiple";
125-
const auto FNAME_SPIRV_LOCAL_SIZE_FOR_SUB_GROUP_COUNT = "__builtin_spirv_OpGetKernelLocalSizeForSubgroupCount";
126-
const auto FNAME_SPIRV_MAX_NUM_SUB_GROUPS = "__builtin_spirv_OpGetKernelMaxNumSubgroups";
112+
enum class DeviceEnqueueFunction {
113+
ENQUEUE_KERNEL,
114+
ENQUEUE_KERNEL_BASIC,
115+
ENQUEUE_KERNEL_VAARGS,
116+
ENQUEUE_KERNEL_EVENTS_VAARGS,
117+
WORK_GROUP_SIZE_IMPL,
118+
PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
119+
PREFERRED_WORK_GROUP_MULTIPLE_IMPL,
120+
MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
121+
SUB_GROUP_COUNT_FOR_NDRANGE,
122+
SPIRV_ENQUEUE_KERNEL,
123+
SPIRV_SUB_GROUP_COUNT_FOR_NDRANGE,
124+
SPIRV_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
125+
SPIRV_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
126+
SPIRV_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
127+
SPIRV_MAX_NUM_SUB_GROUPS,
128+
NUM_FUNCTIONS_WITH_BLOCK_ARGS
129+
};
130+
131+
const std::map<DeviceEnqueueFunction, const char*> DeviceEnqueueFunctionNames = {
132+
{ DeviceEnqueueFunction::ENQUEUE_KERNEL, "_Z14enqueue_kernel" },
133+
{ DeviceEnqueueFunction::ENQUEUE_KERNEL_BASIC, "__enqueue_kernel_basic" },
134+
{ DeviceEnqueueFunction::ENQUEUE_KERNEL_VAARGS, "__enqueue_kernel_vaargs" },
135+
{ DeviceEnqueueFunction::ENQUEUE_KERNEL_EVENTS_VAARGS, "__enqueue_kernel_events_vaargs" },
136+
{ DeviceEnqueueFunction::WORK_GROUP_SIZE_IMPL, "__get_kernel_work_group_size_impl" },
137+
{ DeviceEnqueueFunction::PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "_Z45get_kernel_preferred_work_group_size_multiple" },
138+
{ DeviceEnqueueFunction::PREFERRED_WORK_GROUP_MULTIPLE_IMPL, "__get_kernel_preferred_work_group_multiple_impl" },
139+
{ DeviceEnqueueFunction::MAX_SUB_GROUP_SIZE_FOR_NDRANGE, "_Z41get_kernel_max_sub_group_size_for_ndrange" },
140+
{ DeviceEnqueueFunction::SUB_GROUP_COUNT_FOR_NDRANGE, "_Z38get_kernel_sub_group_count_for_ndrange" },
141+
{ DeviceEnqueueFunction::SPIRV_ENQUEUE_KERNEL, "__builtin_spirv_OpEnqueueKernel" },
142+
{ DeviceEnqueueFunction::SPIRV_SUB_GROUP_COUNT_FOR_NDRANGE, "__builtin_spirv_OpGetKernelNDrangeSubGroupCount" },
143+
{ DeviceEnqueueFunction::SPIRV_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, "__builtin_spirv_OpGetKernelNDrangeMaxSubGroupSize" },
144+
{ DeviceEnqueueFunction::SPIRV_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, "__builtin_spirv_OpGetKernelPreferredWorkGroupSizeMultiple" },
145+
{ DeviceEnqueueFunction::SPIRV_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, "__builtin_spirv_OpGetKernelLocalSizeForSubgroupCount" },
146+
{ DeviceEnqueueFunction::SPIRV_MAX_NUM_SUB_GROUPS, "__builtin_spirv_OpGetKernelMaxNumSubgroups" }
147+
};
127148

128149
/////////////////////////////////////////////////////////////////////////////////////////////////
129150
/// helper class to build and query llvm metadata of kernel/dispatcher
@@ -727,10 +748,10 @@ namespace //Anonymous
727748
return invokeFunc;
728749
}
729750

730-
std::vector<llvm::Value*> getLocaSizes() const { return _local_sizes; }
751+
std::vector<llvm::Value*> getLocalSizes() const { return _local_sizes; }
731752

732753
bool hasEvents() const { return getNumWaitEvents() != nullptr; }
733-
bool hasLocals() const { return getLocaSizes().size() > 0; }
754+
bool hasLocals() const { return getLocalSizes().size() > 0; }
734755
};
735756

736757
//////////////////////////////////////////////////////////////////////////
@@ -885,6 +906,11 @@ namespace //Anonymous
885906
for (unsigned i = localSizesStartArgNum; i < argsNum; i++)
886907
{
887908
auto arg = _call.getArgOperand(i);
909+
if (arg->getType()->isPointerTy()) {
910+
IRBuilder<> builder(&_call);
911+
arg = builder.CreateLoad(arg);
912+
}
913+
888914
if (!arg->getType()->isIntegerTy(64) && !arg->getType()->isIntegerTy(32))
889915
report_fatal_error("OpEnqueueKernel signature does not match");
890916

@@ -1354,7 +1380,7 @@ namespace //Anonymous
13541380

13551381
Function* getInvokeFunctionFromKernelWrapper(const Function* invokeFunc, DataContext& dataContext) {
13561382
assert(isInvokeFunctionKernelWrapper(invokeFunc, dataContext));
1357-
const CallInst* inst = dyn_cast<CallInst>(*(invokeFunc->arg_begin())->user_begin());
1383+
const CallInst* inst = dyn_cast<CallInst>(&*(invokeFunc->begin()->begin()));
13581384
if (inst) {
13591385
return inst->getCalledFunction();
13601386
} else {
@@ -1363,11 +1389,21 @@ namespace //Anonymous
13631389
}
13641390

13651391
bool isEnqueueKernelFunction(StringRef funcName) {
1366-
return funcName.startswith(FNAME_ENQUEUE_KERNEL) ||
1367-
funcName.startswith(FNAME_SPIRV_ENQUEUE_KERNEL) ||
1368-
funcName.startswith(FNAME_ENQUEUE_KERNEL_BASIC) ||
1369-
funcName.startswith(FNAME_ENQUEUE_KERNEL_VAARGS) ||
1370-
funcName.startswith(FNAME_ENQUEUE_KERNEL_EVENTS_VAARGS);
1392+
1393+
return funcName.startswith(DeviceEnqueueFunctionNames.at(DeviceEnqueueFunction::ENQUEUE_KERNEL)) ||
1394+
funcName.startswith(DeviceEnqueueFunctionNames.at(DeviceEnqueueFunction::SPIRV_ENQUEUE_KERNEL)) ||
1395+
funcName.startswith(DeviceEnqueueFunctionNames.at(DeviceEnqueueFunction::ENQUEUE_KERNEL_BASIC)) ||
1396+
funcName.startswith(DeviceEnqueueFunctionNames.at(DeviceEnqueueFunction::ENQUEUE_KERNEL_VAARGS)) ||
1397+
funcName.startswith(DeviceEnqueueFunctionNames.at(DeviceEnqueueFunction::ENQUEUE_KERNEL_EVENTS_VAARGS));
1398+
}
1399+
1400+
bool isDeviceEnqueueFunction(StringRef funcName) {
1401+
for (auto el : DeviceEnqueueFunctionNames) {
1402+
if (funcName.startswith(el.second)) {
1403+
return true;
1404+
}
1405+
}
1406+
return false;
13711407
}
13721408

13731409

@@ -1423,7 +1459,8 @@ namespace //Anonymous
14231459
{
14241460
bool changed = false;
14251461
for (auto &func : M.functions()) {
1426-
if (!isEnqueueKernelFunction(func.getName())) continue;
1462+
if (!isDeviceEnqueueFunction(func.getName())) continue;
1463+
14271464
for (auto user : func.users()) {
14281465
auto callInst = dyn_cast<CallInst>(user);
14291466
if (!callInst) continue;
@@ -1737,70 +1774,72 @@ namespace //Anonymous
17371774
CallHandler* DataContext::registerCallHandler(llvm::CallInst& call)
17381775
{
17391776
// device enqueue call handlers factories registry
1740-
const std::pair<StringRef, std::function<CallHandler*(llvm::CallInst&, DataContext& dm)>> handlers[] =
1777+
const std::pair<DeviceEnqueueFunction, std::function<CallHandler*(llvm::CallInst&, DataContext& dm)>> handlers[] =
17411778
{
17421779
{
1743-
FNAME_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
1780+
DeviceEnqueueFunction::MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
17441781
[](llvm::CallInst& call, DataContext& dm){ return new KernelSubGroupSizeCall(new ObjCNDRangeAndBlockCallArgs(call, dm)); }
17451782
},
17461783
{
1747-
FNAME_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1784+
DeviceEnqueueFunction::PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
17481785
[](llvm::CallInst& call, DataContext& dm){ return new KernelSubGroupSizeCall(new ObjCBlockCallArgs(call, dm)); }
17491786
},
17501787
{
1751-
FNAME_PREFERRED_WORK_GROUP_MULTIPLE_IMPL,
1788+
DeviceEnqueueFunction::PREFERRED_WORK_GROUP_MULTIPLE_IMPL,
17521789
[](llvm::CallInst& call, DataContext& dm) { return new KernelSubGroupSizeCall(new ObjCBlockCallArgs(call, dm)); }
17531790
},
17541791
{
1755-
FNAME_WORK_GROUP_SIZE_IMPL,
1792+
DeviceEnqueueFunction::WORK_GROUP_SIZE_IMPL,
17561793
[](llvm::CallInst& call, DataContext& dm) { return new KernelMaxWorkGroupSizeCall(new ObjCBlockCallArgs(call, dm)); }
17571794
},
17581795
{
1759-
FNAME_SUB_GROUP_COUNT_FOR_NDRANGE,
1796+
DeviceEnqueueFunction::SUB_GROUP_COUNT_FOR_NDRANGE,
17601797
[](llvm::CallInst& call, DataContext& dm){ return new KernelSubGroupCountForNDRangeCall(new ObjCNDRangeAndBlockCallArgs(call, dm)); }
17611798
},
17621799
{
1763-
FNAME_ENQUEUE_KERNEL,
1800+
DeviceEnqueueFunction::ENQUEUE_KERNEL,
17641801
[](llvm::CallInst& call, DataContext& dm){ return new EnqueueKernelCall(new ObjCEnqueueKernelArgs(call, dm)); }
17651802
},
17661803
{
1767-
FNAME_ENQUEUE_KERNEL_BASIC,
1804+
DeviceEnqueueFunction::ENQUEUE_KERNEL_BASIC,
17681805
[](llvm::CallInst& call, DataContext& dm){ return new EnqueueKernelCall(new ObjCEnqueueKernelArgs(call, dm)); }
17691806
},
17701807
{
1771-
FNAME_ENQUEUE_KERNEL_VAARGS,
1808+
DeviceEnqueueFunction::ENQUEUE_KERNEL_VAARGS,
17721809
[](llvm::CallInst& call, DataContext& dm){ return new EnqueueKernelCall(new ObjCEnqueueKernelArgs(call, dm)); }
17731810
},
17741811
{
1775-
FNAME_ENQUEUE_KERNEL_EVENTS_VAARGS,
1812+
DeviceEnqueueFunction::ENQUEUE_KERNEL_EVENTS_VAARGS,
17761813
[](llvm::CallInst& call, DataContext& dm){ return new EnqueueKernelCall(new ObjCEnqueueKernelArgs(call, dm)); }
17771814
},
17781815
{
1779-
FNAME_SPIRV_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
1816+
DeviceEnqueueFunction::SPIRV_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
17801817
[](llvm::CallInst& call, DataContext& dm){ return new KernelSubGroupSizeCall(new SPIRVNDRangeAndInvokeCallArgs(call, dm)); }
17811818
},
17821819
{
1783-
FNAME_SPIRV_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1820+
DeviceEnqueueFunction::SPIRV_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
17841821
[](llvm::CallInst& call, DataContext& dm){ return new KernelSubGroupSizeCall(new SPIRVInvokeCallArgs(call, dm)); }
17851822
},
17861823
{
1787-
FNAME_SPIRV_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
1824+
DeviceEnqueueFunction::SPIRV_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
17881825
[](llvm::CallInst& call, DataContext& dm){ return new KernelLocalSizeForSubgroupCount(new SPIRVSubgroupCountAndInvokeCallArgs(call, dm)); }
17891826
},
17901827
{
1791-
FNAME_SPIRV_MAX_NUM_SUB_GROUPS,
1828+
DeviceEnqueueFunction::SPIRV_MAX_NUM_SUB_GROUPS,
17921829
[](llvm::CallInst& call, DataContext& dm){ return new KernelMaxNumSubgroups(new SPIRVInvokeCallArgs(call, dm)); }
17931830
},
17941831
{
1795-
FNAME_SPIRV_SUB_GROUP_COUNT_FOR_NDRANGE,
1832+
DeviceEnqueueFunction::SPIRV_SUB_GROUP_COUNT_FOR_NDRANGE,
17961833
[](llvm::CallInst& call, DataContext& dm){ return new KernelSubGroupCountForNDRangeCall(new SPIRVNDRangeAndInvokeCallArgs(call, dm)); }
17971834
},
17981835
{
1799-
FNAME_SPIRV_ENQUEUE_KERNEL,
1836+
DeviceEnqueueFunction::SPIRV_ENQUEUE_KERNEL,
18001837
[](llvm::CallInst& call, DataContext& dm){ return new EnqueueKernelCall(new SPIRVOpEnqueueKernelCallArgs(call, dm)); }
18011838
},
18021839
};
18031840

1841+
static_assert(sizeof(handlers) == sizeof(decltype(handlers[0])) * (size_t)DeviceEnqueueFunction::NUM_FUNCTIONS_WITH_BLOCK_ARGS, "Not all enqueue functions have handlers!");
1842+
18041843
auto calledFunction = call.getCalledFunction();
18051844
//fail indirect calls
18061845
if (calledFunction == nullptr)
@@ -1814,7 +1853,7 @@ namespace //Anonymous
18141853
for (auto& handler_pair : handlers)
18151854
{
18161855
// if called function name matches one of known
1817-
if (calledFunctionName.startswith(handler_pair.first))
1856+
if (calledFunctionName.startswith(DeviceEnqueueFunctionNames.at(handler_pair.first)))
18181857
{
18191858
// use appropriate factory to construct CallHandler
18201859
auto callHandler = handler_pair.second(call, *this);
@@ -2178,16 +2217,20 @@ namespace //Anonymous
21782217
llvm::CallInst* BlockInvoke::EmitBlockInvokeCall(IGCLLVM::IRBuilder<>& builder, llvm::ArrayRef<llvm::Argument*> captures, llvm::ArrayRef<llvm::Argument*> tailingArgs) const
21792218
{
21802219
//IRBuilder: allocate structure
2181-
auto block_descriptor_val = builder.CreateAlloca(_captureStructType, nullptr, ".block_struct");
2182-
auto dl = getFunction()->getParent()->getDataLayout();
2183-
auto blockStructAlign = getPrefStructAlignment(_captureStructType, &dl);
2184-
block_descriptor_val->setAlignment(blockStructAlign);
2185-
//IRBuilder: store arguments to structure
2186-
StoreInstBuilder storeBuilder(builder);
2187-
for (unsigned argIdx = 0; argIdx < getCaptureIndicies().size(); ++argIdx)
2188-
{
2189-
auto srcArg = captures[argIdx];
2190-
storeBuilder.Store(block_descriptor_val, srcArg, getCaptureIndicies()[argIdx]);
2220+
// If we didn't track the capturedStructType, it might have been not used in the kernel.
2221+
Value* block_descriptor_val = ConstantPointerNull::get(builder.getInt8PtrTy());
2222+
if (_captureStructType) {
2223+
block_descriptor_val = builder.CreateAlloca(_captureStructType, nullptr, ".block_struct");
2224+
auto dl = getFunction()->getParent()->getDataLayout();
2225+
auto blockStructAlign = getPrefStructAlignment(_captureStructType, &dl);
2226+
cast<AllocaInst>(block_descriptor_val)->setAlignment(blockStructAlign);
2227+
//IRBuilder: store arguments to structure
2228+
StoreInstBuilder storeBuilder(builder);
2229+
for (unsigned argIdx = 0; argIdx < getCaptureIndicies().size(); ++argIdx)
2230+
{
2231+
auto srcArg = captures[argIdx];
2232+
storeBuilder.Store(block_descriptor_val, srcArg, getCaptureIndicies()[argIdx]);
2233+
}
21912234
}
21922235

21932236
//IRBuilder: call block_invoke
@@ -2533,14 +2576,14 @@ namespace //Anonymous
25332576
if (_deviceExecCall->hasLocals())
25342577
{
25352578
auto int32ptrty = Type::getInt32PtrTy(context);
2536-
auto localsBuf = AllocateBuffer(int32ty, _deviceExecCall->getLocaSizes().size(), "local_size_buf");
2579+
auto localsBuf = AllocateBuffer(int32ty, _deviceExecCall->getLocalSizes().size(), "local_size_buf");
25372580
uint64_t localSizeOffset = 0;
2538-
for (auto localSizeValue : _deviceExecCall->getLocaSizes())
2581+
for (auto localSizeValue : _deviceExecCall->getLocalSizes())
25392582
{
25402583
auto storedSize = storeBuilder.Store(localsBuf, localSizeValue, localSizeOffset);
25412584
localSizeOffset += sizeInBlocks(storedSize, int32ty);
25422585
}
2543-
assert(_deviceExecCall->getLocaSizes().size() == localSizeOffset);
2586+
assert(_deviceExecCall->getLocalSizes().size() == localSizeOffset);
25442587

25452588
localSizesBuf = builder.CreatePointerCast(localsBuf, int32ptrty);
25462589
localSizesNumValue = llvm::ConstantInt::get(int32ty, localSizeOffset);

0 commit comments

Comments
 (0)