Skip to content

[SYCL][PI][L0] Fixes to make sure kernels and programs cannot be destroyed before they have finished execution. #2710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 3, 2020
81 changes: 63 additions & 18 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,9 @@ bool _pi_queue::isBatchingAllowed() {
}

pi_result _pi_queue::batchCommandList(ze_command_list_handle_t ZeCommandList,
ze_fence_handle_t ZeFence) {
if (this->isBatchingAllowed()) {
ze_fence_handle_t ZeFence,
bool OKToBatchKernel) {
if (OKToBatchKernel && this->isBatchingAllowed()) {
assert(this->ZeOpenCommandList == nullptr ||
this->ZeOpenCommandList == ZeCommandList);

Expand Down Expand Up @@ -2759,12 +2760,16 @@ pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle,
}

_pi_program::~_pi_program() {
if (ZeModule) {
ZE_CALL_NOCHECK(zeModuleDestroy(ZeModule));
}
// According to Level Zero Specification, all kernels and build logs
// must be destroyed before the Module can be destroyed. So, be sure
// to destroy build log before destroying the module.
if (ZeBuildLog) {
ZE_CALL_NOCHECK(zeModuleBuildLogDestroy(ZeBuildLog));
}

if (ZeModule) {
ZE_CALL_NOCHECK(zeModuleDestroy(ZeModule));
}
}

_pi_program::LinkedReleaser::~LinkedReleaser() {
Expand Down Expand Up @@ -2902,6 +2907,10 @@ pi_result piKernelCreate(pi_program Program, const char *KernelName,
} catch (...) {
return PI_ERROR_UNKNOWN;
}

// Update the refcount of the program to show its use by this kernel.
piProgramRetain(Program);

return PI_SUCCESS;
}

Expand Down Expand Up @@ -3091,16 +3100,24 @@ pi_result piKernelRetain(pi_kernel Kernel) {

assert(Kernel);
++(Kernel->RefCount);
// When retaining a kernel, you are also retaining the program it is part of.
piProgramRetain(Kernel->Program);
return PI_SUCCESS;
}

pi_result piKernelRelease(pi_kernel Kernel) {

assert(Kernel);
auto KernelProgram = Kernel->Program;

if (--(Kernel->RefCount) == 0) {
zeKernelDestroy(Kernel->ZeKernel);
delete Kernel;
}

// do a release on the program this kernel was part of
piProgramRelease(KernelProgram);

return PI_SUCCESS;
}

Expand Down Expand Up @@ -3186,11 +3203,19 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

// It is only OK to batch this kernel if there is an event that can be used
// to release the kernel once it has been executed. So we will not allow
// these to be batched, only executed immediately.
// Should this also force a synchronize immediately? That would also
// ensure that the kernel had completed execution before it could be
// released, but that may really hurt performance.
bool BatchingThisKernelOK = (Event);

// Get a new command list to be used on this call
ze_command_list_handle_t ZeCommandList = nullptr;
ze_fence_handle_t ZeFence = nullptr;
if (auto Res = Queue->Device->getAvailableCommandList(Queue, &ZeCommandList,
&ZeFence, true))
if (auto Res = Queue->Device->getAvailableCommandList(
Queue, &ZeCommandList, &ZeFence, BatchingThisKernelOK))
return Res;

ze_event_handle_t ZeEvent = nullptr;
Expand All @@ -3202,6 +3227,13 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
(*Event)->Queue = Queue;
(*Event)->CommandType = PI_COMMAND_TYPE_NDRANGE_KERNEL;
(*Event)->ZeCommandList = ZeCommandList;
(*Event)->CommandData = (void *)Kernel;

// Use piKernelRetain to increment the reference count and indicate
// that the Kernel is in use. Once the event has been signaled, the
// code in cleanupAfterEvent will do a piReleaseKernel to update
// the reference count on the kernel.
piKernelRetain(Kernel);

ZeEvent = (*Event)->ZeEvent;
}
Expand All @@ -3227,7 +3259,8 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,

// Execute command list asynchronously, as the event will be used
// to track down its completion.
if (auto Res = Queue->batchCommandList(ZeCommandList, ZeFence))
if (auto Res =
Queue->batchCommandList(ZeCommandList, ZeFence, BatchingThisKernelOK))
return Res;

_pi_event::deleteZeEventList(ZeEventWaitList);
Expand Down Expand Up @@ -3356,21 +3389,26 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
return PI_SUCCESS;
}

// Recycle the command list associated with this event.
static void recycleEventCommandList(pi_event Event) {
// Perform any necessary cleanup after an event has been signalled.
// This currently recycles the associate command list, and also makes
// sure to release any kernel that may have been used by the event.
static void cleanupAfterEvent(pi_event Event) {
// The implementation of this is slightly tricky. The same event
// can be referred to by multiple threads, so it is possible to
// have a race condition between the read of ZeCommandList and
// it being reset to nullptr in another thread.
// But, since the ZeCommandList is uniquely associated with the queue
// have a race condition between the read of fields of the event,
// and resetiing those fields in some other thread.
// But, since the event is uniquely associated with the queue
// for the event, we use the locking that we already have to do on the
// queue to also serve as the thread safety mechanism for the
// Event's ZeCommandList.
// any of the Event's data members that need to be read/reset as
// part of the cleanup operations.
auto Queue = Event->Queue;

// Lock automatically releases when this goes out of scope.
std::lock_guard<std::mutex> lock(Queue->PiQueueMutex);

// Cleanup the command list associated with the event if it hasn't
// been cleaned up already.
auto EventCommandList = Event->ZeCommandList;

if (EventCommandList) {
Expand All @@ -3386,6 +3424,13 @@ static void recycleEventCommandList(pi_event Event) {
}
}
}

// Release the kernel associated with this event if there is one.
if (Event->CommandType == PI_COMMAND_TYPE_NDRANGE_KERNEL &&
Event->CommandData) {
piKernelRelease((pi_kernel)(Event->CommandData));
Event->CommandData = nullptr;
}
}

pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
Expand All @@ -3412,9 +3457,9 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
zePrint("ZeEvent = %lx\n", pi_cast<std::uintptr_t>(ZeEvent));
ZE_CALL(zeEventHostSynchronize(ZeEvent, UINT32_MAX));

// NOTE: we are destroying associated command lists here to free
// resources sooner in case RT is not calling piEventRelease soon enough.
recycleEventCommandList(EventList[I]);
// NOTE: we are cleaning up after the event here to free resources
// sooner in case run-time is not calling piEventRelease soon enough.
cleanupAfterEvent(EventList[I]);
}
return PI_SUCCESS;
}
Expand All @@ -3441,7 +3486,7 @@ pi_result piEventRetain(pi_event Event) {
pi_result piEventRelease(pi_event Event) {
assert(Event);
if (--(Event->RefCount) == 0) {
recycleEventCommandList(Event);
cleanupAfterEvent(Event);

if (Event->CommandType == PI_COMMAND_TYPE_MEM_BUFFER_UNMAP &&
Event->CommandData) {
Expand Down
4 changes: 3 additions & 1 deletion sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,10 @@ struct _pi_queue : _pi_object {
// Attach a command list to this queue and allow it to remain open
// and used for further batching. It may be executed immediately,
// or it may be left open for other future command to be batched into.
// OKToBatchKernel indicates whether for this particular kernel in the
// command list whether batching is allowed.
pi_result batchCommandList(ze_command_list_handle_t ZeCommandList,
ze_fence_handle_t ZeFence);
ze_fence_handle_t ZeFence, bool OKToBatchKernel);

// Attach a command list to this queue, close, and execute it.
// Note that this command list cannot be appended to after this.
Expand Down