diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e0f16833e0eb7..938f2ab198497 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -82,6 +82,9 @@ typedef enum { PI_INVALID_QUEUE = CL_INVALID_COMMAND_QUEUE, PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, PI_INVALID_PROGRAM = CL_INVALID_PROGRAM, + PI_INVALID_PROGRAM_EXECUTABLE = CL_INVALID_PROGRAM_EXECUTABLE, + PI_INVALID_SAMPLER = CL_INVALID_SAMPLER, + PI_INVALID_BUFFER_SIZE = CL_INVALID_BUFFER_SIZE, PI_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, PI_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, PI_INVALID_EVENT = CL_INVALID_EVENT, @@ -93,8 +96,10 @@ typedef enum { PI_PROFILING_INFO_NOT_AVAILABLE = CL_PROFILING_INFO_NOT_AVAILABLE, PI_DEVICE_NOT_FOUND = CL_DEVICE_NOT_FOUND, PI_INVALID_WORK_ITEM_SIZE = CL_INVALID_WORK_ITEM_SIZE, + PI_INVALID_WORK_DIMENSION = CL_INVALID_WORK_DIMENSION, PI_INVALID_KERNEL_ARGS = CL_INVALID_KERNEL_ARGS, PI_INVALID_IMAGE_SIZE = CL_INVALID_IMAGE_SIZE, + PI_INVALID_IMAGE_FORMAT_DESCRIPTOR = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, PI_IMAGE_FORMAT_NOT_SUPPORTED = CL_IMAGE_FORMAT_NOT_SUPPORTED, PI_MEM_OBJECT_ALLOCATION_FAILURE = CL_MEM_OBJECT_ALLOCATION_FAILURE, PI_ERROR_UNKNOWN = -999 diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8fbccb21197ed..12a0449267200 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -384,6 +384,9 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *CallStr, return ZeResult; } +#define PI_ASSERT(condition, error) \ + if (!(condition)) \ + return error; #define ZE_CALL(Call) \ if (auto Result = ZeCall().doCall(Call, #Call, true)) \ return mapError(Result); @@ -624,8 +627,10 @@ pi_result _pi_queue::executeCommandList(ze_command_list_handle_t ZeCommandList, bool IsBlocking, bool OKToBatchCommand) { if (OKToBatchCommand && this->isBatchingAllowed()) { - assert(this->ZeOpenCommandList == nullptr || - this->ZeOpenCommandList == ZeCommandList); + if (this->ZeOpenCommandList != nullptr && + this->ZeOpenCommandList != ZeCommandList) + die("executeCommandList: ZeOpenCommandList should be equal to" + "null or ZeCommandList"); if (this->ZeOpenCommandListSize + 1 < QueueBatchSize) { this->ZeOpenCommandList = ZeCommandList; @@ -791,7 +796,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { - assert(NumPlatforms != 0); + PI_ASSERT(NumPlatforms != 0, PI_INVALID_VALUE); *NumPlatforms = 0; return PI_SUCCESS; } @@ -840,7 +845,8 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, PiPlatformCachePopulated = true; } else { ze_driver_handle_t ZeDriver; - assert(ZeDriverCount == 1); + PI_ASSERT(ZeDriverCount == 1, PI_INVALID_VALUE); + ZE_CALL(zeDriverGet(&ZeDriverCount, &ZeDriver)); pi_platform Platform = new _pi_platform(ZeDriver); @@ -899,7 +905,8 @@ pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Platform); + PI_ASSERT(Platform, PI_INVALID_PLATFORM); + zePrint("==========================\n"); zePrint("SYCL over Level-Zero %s\n", Platform->ZeDriverVersion.c_str()); zePrint("==========================\n"); @@ -946,8 +953,8 @@ pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, pi_result piextPlatformGetNativeHandle(pi_platform Platform, pi_native_handle *NativeHandle) { - assert(Platform); - assert(NativeHandle); + PI_ASSERT(Platform, PI_INVALID_PLATFORM); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); auto ZeDriver = pi_cast(NativeHandle); // Extract the Level Zero driver handle from the given PI platform @@ -957,8 +964,8 @@ pi_result piextPlatformGetNativeHandle(pi_platform Platform, pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, pi_platform *Platform) { - assert(NativeHandle); - assert(Platform); + PI_ASSERT(Platform, PI_INVALID_PLATFORM); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); auto ZeDriver = pi_cast(NativeHandle); @@ -1010,7 +1017,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, pi_uint32 NumEntries, pi_device *Devices, pi_uint32 *NumDevices) { - assert(Platform); + PI_ASSERT(Platform, PI_INVALID_PLATFORM); // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; @@ -1035,8 +1042,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, *NumDevices = ZeDeviceCount; if (NumEntries == 0) { - assert(Devices == nullptr && - "Devices should be nullptr when querying the number of devices"); + // Devices should be nullptr when querying the number of devices + PI_ASSERT(Devices == nullptr, PI_INVALID_VALUE); return PI_SUCCESS; } @@ -1091,7 +1098,8 @@ static pi_result populateDeviceCacheIfNeeded(pi_platform Platform) { } pi_result piDeviceRetain(pi_device Device) { - assert(Device); + PI_ASSERT(Device, PI_INVALID_DEVICE); + // The root-device ref-count remains unchanged (always 1). if (Device->IsSubDevice) { ++(Device->RefCount); @@ -1100,8 +1108,12 @@ pi_result piDeviceRetain(pi_device Device) { } pi_result piDeviceRelease(pi_device Device) { - assert(Device); - assert(Device->RefCount > 0 && "Device is already released."); + PI_ASSERT(Device, PI_INVALID_DEVICE); + + // Check if the device is already released + if (Device->RefCount <= 0) + die("piDeviceRelease: the device has been already released"); + // TODO: OpenCL says root-device ref-count remains unchanged (1), // but when would we free the device's data? if (Device->IsSubDevice) { @@ -1124,14 +1136,15 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Device != nullptr); + PI_ASSERT(Device, PI_INVALID_DEVICE); ze_device_handle_t ZeDevice = Device->ZeDevice; uint32_t ZeAvailMemCount = 0; ZE_CALL(zeDeviceGetMemoryProperties(ZeDevice, &ZeAvailMemCount, nullptr)); + // Confirm at least one memory is available in the device - assert(ZeAvailMemCount > 0); + PI_ASSERT(ZeAvailMemCount > 0, PI_INVALID_VALUE); std::vector ZeDeviceMemoryProperties; try { @@ -1584,7 +1597,8 @@ pi_result piDevicePartition(pi_device Device, return PI_INVALID_VALUE; } - assert(Device); + PI_ASSERT(Device, PI_INVALID_DEVICE); + // Get the number of subdevices available. // TODO: maybe add interface to create the specified # of subdevices. uint32_t Count = 0; @@ -1635,9 +1649,9 @@ piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? pi_device_binary *Binaries, pi_uint32 NumBinaries, pi_uint32 *SelectedBinaryInd) { - assert(Device); - assert(SelectedBinaryInd); - assert(NumBinaries == 0 || Binaries); + PI_ASSERT(Device, PI_INVALID_DEVICE); + PI_ASSERT(SelectedBinaryInd, PI_INVALID_VALUE); + PI_ASSERT(NumBinaries == 0 || Binaries, PI_INVALID_VALUE); // TODO: this is a bare-bones implementation for choosing a device image // that would be compatible with the targeted device. An AOT-compiled @@ -1679,8 +1693,8 @@ piextDeviceSelectBinary(pi_device Device, // TODO: does this need to be context? pi_result piextDeviceGetNativeHandle(pi_device Device, pi_native_handle *NativeHandle) { - assert(Device); - assert(NativeHandle); + PI_ASSERT(Device, PI_INVALID_DEVICE); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); auto ZeDevice = pi_cast(NativeHandle); // Extract the Level Zero module handle from the given PI device @@ -1691,9 +1705,9 @@ pi_result piextDeviceGetNativeHandle(pi_device Device, pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, pi_platform Platform, pi_device *Device) { - assert(NativeHandle); - assert(Device); - assert(Platform); + PI_ASSERT(Device, PI_INVALID_DEVICE); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); + PI_ASSERT(Platform, PI_INVALID_PLATFORM); std::lock_guard Lock(Platform->PiDevicesCacheMutex); pi_result Res = populateDeviceCacheIfNeeded(Platform); @@ -1725,11 +1739,8 @@ pi_result piContextCreate(const pi_context_properties *Properties, const void *PrivateInfo, size_t CB, void *UserData), void *UserData, pi_context *RetContext) { - if (!Devices) { - return PI_INVALID_VALUE; - } - - assert(RetContext); + PI_ASSERT(Devices, PI_INVALID_DEVICE); + PI_ASSERT(RetContext, PI_INVALID_VALUE); ze_context_desc_t ContextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; ze_context_handle_t ZeContext; @@ -1751,7 +1762,7 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Context); + PI_ASSERT(Context, PI_INVALID_CONTEXT); ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); switch (ParamName) { @@ -1780,8 +1791,8 @@ pi_result piextContextSetExtendedDeleter(pi_context Context, pi_result piextContextGetNativeHandle(pi_context Context, pi_native_handle *NativeHandle) { - assert(Context); - assert(NativeHandle); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); auto ZeContext = pi_cast(NativeHandle); // Extract the Level Zero queue handle from the given PI queue @@ -1793,12 +1804,10 @@ pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, pi_uint32 NumDevices, const pi_device *Devices, pi_context *RetContext) { - assert(NativeHandle); - assert(RetContext); - - if (!Devices || !NumDevices) { - return PI_INVALID_VALUE; - } + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); + PI_ASSERT(Devices, PI_INVALID_DEVICE); + PI_ASSERT(RetContext, PI_INVALID_VALUE); + PI_ASSERT(NumDevices, PI_INVALID_VALUE); try { *RetContext = new _pi_context(pi_cast(NativeHandle), @@ -1815,14 +1824,16 @@ pi_result piextContextCreateWithNativeHandle(pi_native_handle NativeHandle, pi_result piContextRetain(pi_context Context) { - assert(Context); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + ++(Context->RefCount); return PI_SUCCESS; } pi_result piContextRelease(pi_context Context) { - assert(Context); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + if (--(Context->RefCount) == 0) { auto ZeContext = Context->ZeContext; @@ -1849,22 +1860,23 @@ pi_result piQueueCreate(pi_context Context, pi_device Device, pi_queue_properties Properties, pi_queue *Queue) { // Check that unexpected bits are not set. - assert(!(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE | - PI_QUEUE_ON_DEVICE_DEFAULT))); + PI_ASSERT(!(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE | + PI_QUEUE_ON_DEVICE_DEFAULT)), + PI_INVALID_VALUE); ze_device_handle_t ZeDevice; ze_command_queue_handle_t ZeCommandQueue; - if (!Context) { - return PI_INVALID_CONTEXT; - } + PI_ASSERT(Context, PI_INVALID_CONTEXT); + if (std::find(Context->Devices.begin(), Context->Devices.end(), Device) == Context->Devices.end()) { return PI_INVALID_DEVICE; } - assert(Device); + PI_ASSERT(Device, PI_INVALID_DEVICE); + ZeDevice = Device->ZeDevice; ze_command_queue_desc_t ZeCommandQueueDesc = {}; ZeCommandQueueDesc.ordinal = Device->ZeComputeQueueGroupIndex; @@ -1876,7 +1888,8 @@ pi_result piQueueCreate(pi_context Context, pi_device Device, &ZeCommandQueueDesc, // TODO: translate properties &ZeCommandQueue)); - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); + try { *Queue = new _pi_queue(ZeCommandQueue, Context, Device, ZeCommandListBatchSize); @@ -1892,7 +1905,7 @@ pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); // TODO: consider support for queue properties and size @@ -1930,7 +1943,8 @@ pi_result piQueueRetain(pi_queue Queue) { } pi_result piQueueRelease(pi_queue Queue) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); + // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -1957,7 +1971,7 @@ pi_result piQueueRelease(pi_queue Queue) { pi_result piQueueFinish(pi_queue Queue) { // Wait until command lists attached to the command queue are executed. - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -1972,8 +1986,8 @@ pi_result piQueueFinish(pi_queue Queue) { pi_result piextQueueGetNativeHandle(pi_queue Queue, pi_native_handle *NativeHandle) { - assert(Queue); - assert(NativeHandle); + PI_ASSERT(Queue, PI_INVALID_QUEUE); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -1987,9 +2001,9 @@ pi_result piextQueueGetNativeHandle(pi_queue Queue, pi_result piextQueueCreateWithNativeHandle(pi_native_handle NativeHandle, pi_context Context, pi_queue *Queue) { - assert(NativeHandle); - assert(Context); - assert(Queue); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); + PI_ASSERT(Queue, PI_INVALID_QUEUE); auto ZeQueue = pi_cast(NativeHandle); @@ -2005,11 +2019,16 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, const pi_mem_properties *properties) { // TODO: implement read-only, write-only - assert((Flags & PI_MEM_FLAGS_ACCESS_RW) != 0); - assert(Context); - assert(RetMem); - assert(properties == nullptr && - "no mem properties goes to Level-Zero RT yet"); + if ((Flags & PI_MEM_FLAGS_ACCESS_RW) == 0) { + die("piMemBufferCreate: Level-Zero implements only read-write buffer," + "no read-only or write-only yet."); + } + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(RetMem, PI_INVALID_VALUE); + + if (properties != nullptr) { + die("piMemBufferCreate: no mem properties goes to Level-Zero RT yet"); + } void *Ptr; ze_device_handle_t ZeDevice = Context->Devices[0]->ZeDevice; @@ -2083,13 +2102,15 @@ pi_result piMemGetInfo(pi_mem Mem, } pi_result piMemRetain(pi_mem Mem) { - assert(Mem); + PI_ASSERT(Mem, PI_INVALID_MEM_OBJECT); + ++(Mem->RefCount); return PI_SUCCESS; } pi_result piMemRelease(pi_mem Mem) { - assert(Mem); + PI_ASSERT(Mem, PI_INVALID_MEM_OBJECT); + if (--(Mem->RefCount) == 0) { if (Mem->isImage()) { ZE_CALL(zeImageDestroy(pi_cast(Mem->getZeHandle()))); @@ -2110,10 +2131,13 @@ pi_result piMemImageCreate(pi_context Context, pi_mem_flags Flags, pi_mem *RetImage) { // TODO: implement read-only, write-only - assert((Flags & PI_MEM_FLAGS_ACCESS_RW) != 0); - assert(ImageFormat); - assert(Context); - assert(RetImage); + if ((Flags & PI_MEM_FLAGS_ACCESS_RW) == 0) { + die("piMemImageCreate: Level-Zero implements only read-write buffer," + "no read-only or write-only yet."); + } + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(RetImage, PI_INVALID_VALUE); + PI_ASSERT(ImageFormat, PI_INVALID_IMAGE_FORMAT_DESCRIPTOR); ze_image_format_type_t ZeImageFormatType; size_t ZeImageFormatTypeSize; @@ -2288,12 +2312,9 @@ pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle, pi_result piProgramCreate(pi_context Context, const void *ILBytes, size_t Length, pi_program *Program) { - if (!Context) - return PI_INVALID_CONTEXT; - if (!ILBytes || !Length) - return PI_INVALID_VALUE; - if (!Program) - return PI_INVALID_VALUE; + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(ILBytes && Length, PI_INVALID_VALUE); + PI_ASSERT(Program, PI_INVALID_PROGRAM); // NOTE: the Level Zero module creation is also building the program, so we // are deferring it until the program is ready to be built in piProgramBuild @@ -2316,17 +2337,14 @@ pi_result piProgramCreateWithBinary(pi_context Context, pi_uint32 NumDevices, pi_int32 *BinaryStatus, pi_program *Program) { - if (!Context) - return PI_INVALID_CONTEXT; - if (!DeviceList || !NumDevices) - return PI_INVALID_VALUE; - if (!Binaries || !Lengths) - return PI_INVALID_VALUE; - if (!Program) - return PI_INVALID_VALUE; + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(DeviceList && NumDevices, PI_INVALID_VALUE); + PI_ASSERT(Binaries && Lengths, PI_INVALID_VALUE); + PI_ASSERT(Program, PI_INVALID_PROGRAM); // For now we support only one device. - assert(NumDevices == 1); + if (NumDevices != 1) + die("piProgramCreateWithBinary: level_zero supports only one device."); if (!Binaries[0] || !Lengths[0]) { if (BinaryStatus) *BinaryStatus = PI_INVALID_VALUE; @@ -2375,7 +2393,8 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Program); + PI_ASSERT(Program, PI_INVALID_PROGRAM); + ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); switch (ParamName) { case PI_PROGRAM_INFO_REFERENCE_COUNT: @@ -2392,9 +2411,10 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, Program->State == _pi_program::Native) { SzBinary = Program->CodeLength; } else { - assert(Program->State == _pi_program::Object || - Program->State == _pi_program::Exe || - Program->State == _pi_program::LinkedExe); + PI_ASSERT(Program->State == _pi_program::Object || + Program->State == _pi_program::Exe || + Program->State == _pi_program::LinkedExe, + PI_INVALID_OPERATION); // If the program is in LinkedExe state it may contain several modules. // We cannot handle this case because each module's contents is in its @@ -2404,7 +2424,9 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, // in Level Zero. Therefore, this API is unimplemented when the Program // has more than one module. _pi_program::ModuleIterator ModIt(Program); - assert(!ModIt.Done()); + + PI_ASSERT(!ModIt.Done(), PI_INVALID_VALUE); + if (ModIt.Count() > 1) { die("piProgramGetInfo: PI_PROGRAM_INFO_BINARY_SIZES not implemented " "for linked programs"); @@ -2426,12 +2448,15 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, Program->State == _pi_program::Native) { std::memcpy(PBinary[0], Program->Code.get(), Program->CodeLength); } else { - assert(Program->State == _pi_program::Object || - Program->State == _pi_program::Exe || - Program->State == _pi_program::LinkedExe); + PI_ASSERT(Program->State == _pi_program::Object || + Program->State == _pi_program::Exe || + Program->State == _pi_program::LinkedExe, + PI_INVALID_OPERATION); _pi_program::ModuleIterator ModIt(Program); - assert(!ModIt.Done()); + + PI_ASSERT(!ModIt.Done(), PI_INVALID_VALUE); + if (ModIt.Count() > 1) { die("piProgramGetInfo: PI_PROGRAM_INFO_BINARIES not implemented for " "linked programs"); @@ -2446,12 +2471,11 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, if (Program->State == _pi_program::IL || Program->State == _pi_program::Native || Program->State == _pi_program::Object) { - // The OpenCL spec says to return CL_INVALID_PROGRAM_EXECUTABLE in this - // case, but there is no corresponding PI error code. - return PI_INVALID_OPERATION; + return PI_INVALID_PROGRAM_EXECUTABLE; } else { - assert(Program->State == _pi_program::Exe || - Program->State == _pi_program::LinkedExe); + PI_ASSERT(Program->State == _pi_program::Exe || + Program->State == _pi_program::LinkedExe, + PI_INVALID_OPERATION); NumKernels = 0; _pi_program::ModuleIterator ModIt(Program); @@ -2470,12 +2494,11 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, if (Program->State == _pi_program::IL || Program->State == _pi_program::Native || Program->State == _pi_program::Object) { - // The OpenCL spec says to return CL_INVALID_PROGRAM_EXECUTABLE in this - // case, but there is no corresponding PI error code. - return PI_INVALID_OPERATION; + return PI_INVALID_PROGRAM_EXECUTABLE; } else { - assert(Program->State == _pi_program::Exe || - Program->State == _pi_program::LinkedExe); + PI_ASSERT(Program->State == _pi_program::Exe || + Program->State == _pi_program::LinkedExe, + PI_INVALID_PROGRAM_EXECUTABLE); bool First = true; _pi_program::ModuleIterator ModIt(Program); @@ -2514,9 +2537,11 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, // We only support one device with Level Zero currently. pi_device Device = Context->Devices[0]; - assert(NumDevices == 1); - assert(DeviceList && DeviceList[0] == Device); - assert(!PFnNotify && !UserData); + if (NumDevices != 1) + die("piProgramLink: level_zero supports only one device."); + + PI_ASSERT(DeviceList && DeviceList[0] == Device, PI_INVALID_DEVICE); + PI_ASSERT(!PFnNotify && !UserData, PI_INVALID_VALUE); // Validate input parameters. if (NumInputPrograms == 0 || InputPrograms == nullptr) @@ -2525,7 +2550,7 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, if (InputPrograms[I]->State != _pi_program::Object) { return PI_INVALID_OPERATION; } - assert(InputPrograms[I]->ZeModule); + PI_ASSERT(InputPrograms[I]->ZeModule, PI_INVALID_VALUE); } // Linking modules on Level Zero is different from OpenCL. With Level Zero, @@ -2638,7 +2663,7 @@ pi_result piProgramCompile( return PI_INVALID_OPERATION; // These aren't supported. - assert(!PFnNotify && !UserData); + PI_ASSERT(!PFnNotify && !UserData, PI_INVALID_VALUE); pi_result res = compileOrBuild(Program, NumDevices, DeviceList, Options); if (res != PI_SUCCESS) @@ -2655,8 +2680,7 @@ pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, // The OpenCL spec says this should return CL_INVALID_PROGRAM, but there is // no corresponding PI error code. - if (!Program) - return PI_INVALID_OPERATION; + PI_ASSERT(Program, PI_INVALID_PROGRAM); // It is legal to build a program created from either IL or from native // device code. @@ -2665,7 +2689,7 @@ pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, return PI_INVALID_OPERATION; // These aren't supported. - assert(!PFnNotify && !UserData); + PI_ASSERT(!PFnNotify && !UserData, PI_INVALID_VALUE); pi_result res = compileOrBuild(Program, NumDevices, DeviceList, Options); if (res != PI_SUCCESS) @@ -2686,10 +2710,13 @@ static pi_result compileOrBuild(pi_program Program, pi_uint32 NumDevices, // We only support build to one device with Level Zero now. // TODO: we should eventually build to the possibly multiple root // devices in the context. - assert(NumDevices == 1 && DeviceList); + if (NumDevices != 1) + die("compileOrBuild: level_zero supports only one device."); + + PI_ASSERT(DeviceList, PI_INVALID_DEVICE); // We should have either IL or native device code. - assert(Program->Code); + PI_ASSERT(Program->Code, PI_INVALID_PROGRAM); // Specialization constants are used only if the program was created from // IL. Translate them to the Level Zero format. @@ -2784,14 +2811,15 @@ pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, } pi_result piProgramRetain(pi_program Program) { - assert(Program); + PI_ASSERT(Program, PI_INVALID_PROGRAM); ++(Program->RefCount); return PI_SUCCESS; } pi_result piProgramRelease(pi_program Program) { - assert(Program); - assert((Program->RefCount > 0) && "Program is already released."); + PI_ASSERT(Program, PI_INVALID_PROGRAM); + // Check if the program is already released + PI_ASSERT(Program->RefCount > 0, PI_INVALID_VALUE); if (--(Program->RefCount) == 0) { delete Program; } @@ -2800,8 +2828,8 @@ pi_result piProgramRelease(pi_program Program) { pi_result piextProgramGetNativeHandle(pi_program Program, pi_native_handle *NativeHandle) { - assert(Program); - assert(NativeHandle); + PI_ASSERT(Program, PI_INVALID_PROGRAM); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); auto ZeModule = pi_cast(NativeHandle); @@ -2810,7 +2838,7 @@ pi_result piextProgramGetNativeHandle(pi_program Program, case _pi_program::Exe: case _pi_program::LinkedExe: { _pi_program::ModuleIterator ModIt(Program); - assert(!ModIt.Done()); + PI_ASSERT(!ModIt.Done(), PI_INVALID_VALUE); if (ModIt.Count() > 1) { // Programs in LinkedExe state could have several corresponding // Level Zero modules, so there is no right answer in this case. @@ -2832,9 +2860,9 @@ pi_result piextProgramGetNativeHandle(pi_program Program, pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, pi_context Context, pi_program *Program) { - assert(NativeHandle); - assert(Context); - assert(Program); + PI_ASSERT(Program, PI_INVALID_PROGRAM); + PI_ASSERT(NativeHandle, PI_INVALID_VALUE); + PI_ASSERT(Context, PI_INVALID_CONTEXT); auto ZeModule = pi_cast(NativeHandle); @@ -2963,16 +2991,13 @@ static bool isOnlineLinkEnabled() { pi_result piKernelCreate(pi_program Program, const char *KernelName, pi_kernel *RetKernel) { - assert(Program); - assert(RetKernel); - assert(KernelName); + PI_ASSERT(Program, PI_INVALID_PROGRAM); + PI_ASSERT(RetKernel, PI_INVALID_VALUE); + PI_ASSERT(KernelName, PI_INVALID_VALUE); - // The OpenCL spec actually says this should return - // CL_INVALID_PROGRAM_EXECUTABLE, but there is no - // corresponding PI error code. if (Program->State != _pi_program::Exe && Program->State != _pi_program::LinkedExe) { - return PI_INVALID_OPERATION; + return PI_INVALID_PROGRAM_EXECUTABLE; } ze_kernel_desc_t ZeKernelDesc = {}; @@ -3022,7 +3047,8 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize, ArgValue = nullptr; } - assert(Kernel); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + ZE_CALL(zeKernelSetArgumentValue( pi_cast(Kernel->ZeKernel), pi_cast(ArgIndex), pi_cast(ArgSize), @@ -3039,7 +3065,8 @@ pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, // RT pass that directly to the regular piKernelSetArg (and // then remove this piextKernelSetArgMemObj). - assert(Kernel); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + ZE_CALL( zeKernelSetArgumentValue(pi_cast(Kernel->ZeKernel), pi_cast(ArgIndex), sizeof(void *), @@ -3051,7 +3078,8 @@ pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, // Special version of piKernelSetArg to accept pi_sampler. pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, const pi_sampler *ArgValue) { - assert(Kernel); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + ZE_CALL(zeKernelSetArgumentValue( pi_cast(Kernel->ZeKernel), pi_cast(ArgIndex), sizeof(void *), &(*ArgValue)->ZeSampler)); @@ -3062,7 +3090,8 @@ pi_result piextKernelSetArgSampler(pi_kernel Kernel, pi_uint32 ArgIndex, pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Kernel); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + ze_kernel_properties_t ZeKernelProperties = {}; ZE_CALL(zeKernelGetProperties(Kernel->ZeKernel, &ZeKernelProperties)); @@ -3118,8 +3147,9 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, pi_kernel_group_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Kernel); - assert(Device); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + PI_ASSERT(Device, PI_INVALID_DEVICE); + ze_device_handle_t ZeDevice = Device->ZeDevice; ze_device_compute_properties_t ZeDeviceComputeProperties = {}; ZE_CALL(zeDeviceGetComputeProperties(ZeDevice, &ZeDeviceComputeProperties)); @@ -3198,7 +3228,8 @@ pi_result piKernelGetSubGroupInfo(pi_kernel Kernel, pi_device Device, pi_result piKernelRetain(pi_kernel Kernel) { - assert(Kernel); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + ++(Kernel->RefCount); // When retaining a kernel, you are also retaining the program it is part of. piProgramRetain(Kernel->Program); @@ -3207,7 +3238,8 @@ pi_result piKernelRetain(pi_kernel Kernel) { pi_result piKernelRelease(pi_kernel Kernel) { - assert(Kernel); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + auto KernelProgram = Kernel->Program; if (--(Kernel->RefCount) == 0) { @@ -3227,13 +3259,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Kernel); - assert(Queue); - assert(Event); - assert((WorkDim > 0) && (WorkDim < 4)); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + PI_ASSERT(Queue, PI_INVALID_QUEUE); + PI_ASSERT(Event, PI_INVALID_EVENT); + PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_INVALID_WORK_DIMENSION); + if (GlobalWorkOffset != NULL) { for (pi_uint32 i = 0; i < WorkDim; i++) { - assert(GlobalWorkOffset[i] == 0); + if (GlobalWorkOffset[i] != 0) { + return PI_INVALID_VALUE; + } } } @@ -3241,8 +3276,8 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, uint32_t WG[3]; // global_work_size of unused dimensions must be set to 1 - assert(WorkDim == 3 || GlobalWorkSize[2] == 1); - assert(WorkDim >= 2 || GlobalWorkSize[1] == 1); + PI_ASSERT(WorkDim == 3 || GlobalWorkSize[2] == 1, PI_INVALID_VALUE); + PI_ASSERT(WorkDim >= 2 || GlobalWorkSize[1] == 1, PI_INVALID_VALUE); if (LocalWorkSize) { WG[0] = pi_cast(LocalWorkSize[0]); @@ -3381,7 +3416,8 @@ pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { ZE_CALL(zeEventCreate(ZeEventPool, &ZeEventDesc, &ZeEvent)); try { - assert(RetEvent); + PI_ASSERT(RetEvent, PI_INVALID_VALUE); + *RetEvent = new _pi_event(ZeEvent, ZeEventPool, Context, PI_COMMAND_TYPE_USER); } catch (const std::bad_alloc &) { @@ -3396,7 +3432,8 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Event); + PI_ASSERT(Event, PI_INVALID_EVENT); + ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); switch (ParamName) { case PI_EVENT_INFO_COMMAND_QUEUE: @@ -3433,7 +3470,8 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Event); + PI_ASSERT(Event, PI_INVALID_EVENT); + uint64_t ZeTimerResolution = Event->Queue->Device->ZeDeviceProperties.timerResolution; @@ -3578,7 +3616,8 @@ pi_result piEventRetain(pi_event Event) { } pi_result piEventRelease(pi_event Event) { - assert(Event); + PI_ASSERT(Event, PI_INVALID_EVENT); + if (--(Event->RefCount) == 0) { cleanupAfterEvent(Event); @@ -3617,8 +3656,8 @@ pi_result piSamplerCreate(pi_context Context, const pi_sampler_properties *SamplerProperties, pi_sampler *RetSampler) { - assert(Context); - assert(RetSampler); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(RetSampler, PI_INVALID_VALUE); // Have the "0" device in context to own the sampler. Rely on Level-Zero // drivers to perform migration as necessary for sharing it across multiple @@ -3738,13 +3777,15 @@ pi_result piSamplerGetInfo(pi_sampler Sampler, pi_sampler_info ParamName, } pi_result piSamplerRetain(pi_sampler Sampler) { - assert(Sampler); + PI_ASSERT(Sampler, PI_INVALID_SAMPLER); + ++(Sampler->RefCount); return PI_SUCCESS; } pi_result piSamplerRelease(pi_sampler Sampler) { - assert(Sampler); + PI_ASSERT(Sampler, PI_INVALID_SAMPLER); + if (--(Sampler->RefCount) == 0) { ZE_CALL(zeSamplerDestroy(Sampler->ZeSampler)); delete Sampler; @@ -3766,7 +3807,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -3812,8 +3853,8 @@ pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Src); - assert(Queue); + PI_ASSERT(Src, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -3832,8 +3873,8 @@ pi_result piEnqueueMemBufferReadRect( pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Buffer); - assert(Queue); + PI_ASSERT(Buffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -3914,9 +3955,7 @@ static pi_result enqueueMemCopyRectHelper( size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Region); - assert(SrcOrigin); - assert(DstOrigin); + PI_ASSERT(Region && SrcOrigin && DstOrigin, PI_INVALID_VALUE); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; @@ -4013,8 +4052,8 @@ pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, const pi_event *EventWaitList, pi_event *Event) { - assert(Buffer); - assert(Queue); + PI_ASSERT(Buffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4035,8 +4074,8 @@ pi_result piEnqueueMemBufferWriteRect( pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Buffer); - assert(Queue); + PI_ASSERT(Buffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4055,9 +4094,8 @@ pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcBuffer, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(SrcBuffer); - assert(DstBuffer); - assert(Queue); + PI_ASSERT(SrcBuffer && DstBuffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4076,9 +4114,8 @@ pi_result piEnqueueMemBufferCopyRect( pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(SrcBuffer); - assert(DstBuffer); - assert(Queue); + PI_ASSERT(SrcBuffer && DstBuffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4131,7 +4168,8 @@ enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr, ZeEventWaitList)); // Pattern size must be a power of two - assert((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0)); + PI_ASSERT((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0), + PI_INVALID_VALUE); ZE_CALL(zeCommandListAppendMemoryFill( ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeEvent, 0, nullptr)); @@ -4164,8 +4202,8 @@ pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, const pi_event *EventWaitList, pi_event *Event) { - assert(Buffer); - assert(Queue); + PI_ASSERT(Buffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4184,10 +4222,10 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, void **RetMap) { // TODO: we don't implement read-only or write-only, always read-write. - // assert((map_flags & PI_MAP_READ) != 0); - // assert((map_flags & PI_MAP_WRITE) != 0); - assert(Buffer); - assert(Queue); + // assert((map_flags & CL_MAP_READ) != 0); + // assert((map_flags & CL_MAP_WRITE) != 0); + PI_ASSERT(Buffer, PI_INVALID_MEM_OBJECT); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // For discrete devices we don't need a commandlist ze_command_list_handle_t ZeCommandList = nullptr; @@ -4280,7 +4318,7 @@ pi_result piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Integrated devices don't need a command list. // If discrete we will get a commandlist later. @@ -4289,7 +4327,7 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, // TODO: handle the case when user does not care to follow the event // of unmap completion. - assert(Event); + PI_ASSERT(Event, PI_INVALID_EVENT); ze_event_handle_t ZeEvent = nullptr; if (Event) { @@ -4380,41 +4418,44 @@ pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, } // extern "C" -static ze_image_region_t getImageRegionHelper(pi_mem Mem, - pi_image_offset Origin, - pi_image_region Region) { +static pi_result getImageRegionHelper(pi_mem Mem, pi_image_offset Origin, + pi_image_region Region, + ze_image_region_t &ZeRegion) { - assert(Mem && Origin); + PI_ASSERT(Mem && Mem->isImage(), PI_INVALID_MEM_OBJECT); + PI_ASSERT(Origin, PI_INVALID_VALUE); #ifndef NDEBUG - assert(Mem->isImage()); auto Image = static_cast<_pi_image *>(Mem); ze_image_desc_t ZeImageDesc = Image->ZeImageDesc; #endif // !NDEBUG - assert((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Origin->y == 0 && - Origin->z == 0) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Origin->z == 0) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Origin->z == 0) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_3D)); + PI_ASSERT((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Origin->y == 0 && + Origin->z == 0) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Origin->z == 0) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Origin->z == 0) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_3D), + PI_INVALID_VALUE); uint32_t OriginX = pi_cast(Origin->x); uint32_t OriginY = pi_cast(Origin->y); uint32_t OriginZ = pi_cast(Origin->z); - assert(Region->width && Region->height && Region->depth); - assert((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Region->height == 1 && - Region->depth == 1) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Region->depth == 1) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Region->depth == 1) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_3D)); + PI_ASSERT(Region->width && Region->height && Region->depth, PI_INVALID_VALUE); + PI_ASSERT( + (ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Region->height == 1 && + Region->depth == 1) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Region->depth == 1) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Region->depth == 1) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_3D), + PI_INVALID_VALUE); uint32_t Width = pi_cast(Region->width); uint32_t Height = pi_cast(Region->height); uint32_t Depth = pi_cast(Region->depth); - const ze_image_region_t ZeRegion = {OriginX, OriginY, OriginZ, - Width, Height, Depth}; - return ZeRegion; + ZeRegion = {OriginX, OriginY, OriginZ, Width, Height, Depth}; + + return PI_SUCCESS; } // Helper function to implement image read/write/copy. @@ -4458,24 +4499,30 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, if (CommandType == PI_COMMAND_TYPE_IMAGE_READ) { pi_mem SrcMem = pi_cast(const_cast(Src)); - const ze_image_region_t ZeSrcRegion = - getImageRegionHelper(SrcMem, SrcOrigin, Region); + ze_image_region_t ZeSrcRegion; + auto Result = getImageRegionHelper(SrcMem, SrcOrigin, Region, ZeSrcRegion); + if (Result != PI_SUCCESS) + return Result; - // TODO: Level Zero does not support row_pitch/slice_pitch for images yet. - // Check that SYCL RT did not want pitch larger than default. + // TODO: Level Zero does not support row_pitch/slice_pitch for images yet. + // Check that SYCL RT did not want pitch larger than default. #ifndef NDEBUG - assert(SrcMem->isImage()); + PI_ASSERT(SrcMem->isImage(), PI_INVALID_MEM_OBJECT); + auto SrcImage = static_cast<_pi_image *>(SrcMem); const ze_image_desc_t &ZeImageDesc = SrcImage->ZeImageDesc; - assert(RowPitch == 0 || - // special case RGBA image pitch equal to region's width - (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 && - RowPitch == 4 * 4 * ZeSrcRegion.width) || - (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 && - RowPitch == 4 * 2 * ZeSrcRegion.width) || - (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 && - RowPitch == 4 * ZeSrcRegion.width)); - assert(SlicePitch == 0 || SlicePitch == RowPitch * ZeSrcRegion.height); + PI_ASSERT( + RowPitch == 0 || + // special case RGBA image pitch equal to region's width + (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 && + RowPitch == 4 * 4 * ZeSrcRegion.width) || + (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 && + RowPitch == 4 * 2 * ZeSrcRegion.width) || + (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 && + RowPitch == 4 * ZeSrcRegion.width), + PI_INVALID_IMAGE_SIZE); + PI_ASSERT(SlicePitch == 0 || SlicePitch == RowPitch * ZeSrcRegion.height, + PI_INVALID_IMAGE_SIZE); #endif // !NDEBUG ZE_CALL(zeCommandListAppendImageCopyToMemory( @@ -4483,24 +4530,30 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, &ZeSrcRegion, ZeEvent, 0, nullptr)); } else if (CommandType == PI_COMMAND_TYPE_IMAGE_WRITE) { pi_mem DstMem = pi_cast(Dst); - const ze_image_region_t ZeDstRegion = - getImageRegionHelper(DstMem, DstOrigin, Region); + ze_image_region_t ZeDstRegion; + auto Result = getImageRegionHelper(DstMem, DstOrigin, Region, ZeDstRegion); + if (Result != PI_SUCCESS) + return Result; - // TODO: Level Zero does not support row_pitch/slice_pitch for images yet. - // Check that SYCL RT did not want pitch larger than default. + // TODO: Level Zero does not support row_pitch/slice_pitch for images yet. + // Check that SYCL RT did not want pitch larger than default. #ifndef NDEBUG - assert(DstMem->isImage()); + PI_ASSERT(DstMem->isImage(), PI_INVALID_MEM_OBJECT); + auto DstImage = static_cast<_pi_image *>(DstMem); const ze_image_desc_t &ZeImageDesc = DstImage->ZeImageDesc; - assert(RowPitch == 0 || - // special case RGBA image pitch equal to region's width - (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 && - RowPitch == 4 * 4 * ZeDstRegion.width) || - (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 && - RowPitch == 4 * 2 * ZeDstRegion.width) || - (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 && - RowPitch == 4 * ZeDstRegion.width)); - assert(SlicePitch == 0 || SlicePitch == RowPitch * ZeDstRegion.height); + PI_ASSERT( + RowPitch == 0 || + // special case RGBA image pitch equal to region's width + (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 && + RowPitch == 4 * 4 * ZeDstRegion.width) || + (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 && + RowPitch == 4 * 2 * ZeDstRegion.width) || + (ZeImageDesc.format.layout == ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 && + RowPitch == 4 * ZeDstRegion.width), + PI_INVALID_IMAGE_SIZE); + PI_ASSERT(SlicePitch == 0 || SlicePitch == RowPitch * ZeDstRegion.height, + PI_INVALID_IMAGE_SIZE); #endif // !NDEBUG ZE_CALL(zeCommandListAppendImageCopyFromMemory( @@ -4510,10 +4563,15 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, pi_mem SrcImage = pi_cast(const_cast(Src)); pi_mem DstImage = pi_cast(Dst); - const ze_image_region_t ZeSrcRegion = - getImageRegionHelper(SrcImage, SrcOrigin, Region); - const ze_image_region_t ZeDstRegion = - getImageRegionHelper(DstImage, DstOrigin, Region); + ze_image_region_t ZeSrcRegion; + auto Result = + getImageRegionHelper(SrcImage, SrcOrigin, Region, ZeSrcRegion); + if (Result != PI_SUCCESS) + return Result; + ze_image_region_t ZeDstRegion; + Result = getImageRegionHelper(DstImage, DstOrigin, Region, ZeDstRegion); + if (Result != PI_SUCCESS) + return Result; ZE_CALL(zeCommandListAppendImageCopyRegion( ZeCommandList, pi_cast(DstImage->getZeHandle()), @@ -4541,7 +4599,7 @@ pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4564,7 +4622,7 @@ pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4586,7 +4644,7 @@ piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, pi_image_region Region, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4607,7 +4665,7 @@ pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4620,17 +4678,25 @@ pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, pi_buffer_create_type BufferCreateType, void *BufferCreateInfo, pi_mem *RetMem) { - assert(Buffer && !Buffer->isImage()); - assert(Flags == PI_MEM_FLAGS_ACCESS_RW); - assert(BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION); - assert(!(static_cast<_pi_buffer *>(Buffer))->isSubBuffer() && - "Sub-buffer cannot be partitioned"); - assert(BufferCreateInfo); - assert(RetMem); + PI_ASSERT(Buffer && !Buffer->isImage() && + !(static_cast<_pi_buffer *>(Buffer))->isSubBuffer(), + PI_INVALID_MEM_OBJECT); + + PI_ASSERT(BufferCreateType == PI_BUFFER_CREATE_TYPE_REGION && + BufferCreateInfo && RetMem, + PI_INVALID_VALUE); + + if (Flags != PI_MEM_FLAGS_ACCESS_RW) { + die("piMemBufferPartition: Level-Zero implements only read-write buffer," + "no read-only or write-only yet."); + } auto Region = (pi_buffer_region)BufferCreateInfo; - assert(Region->size != 0u && "Invalid size"); - assert(Region->origin <= (Region->origin + Region->size) && "Overflow"); + + PI_ASSERT(Region->size != 0u, PI_INVALID_BUFFER_SIZE); + PI_ASSERT(Region->origin <= (Region->origin + Region->size), + PI_INVALID_VALUE); + try { *RetMem = new _pi_buffer(Buffer->Context, @@ -4656,7 +4722,7 @@ pi_result piEnqueueNativeKernel(pi_queue Queue, void (*UserFunc)(void *), const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4669,11 +4735,11 @@ pi_result piEnqueueNativeKernel(pi_queue Queue, void (*UserFunc)(void *), pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, const char *FunctionName, pi_uint64 *FunctionPointerRet) { - assert(Program != nullptr); + PI_ASSERT(Program, PI_INVALID_PROGRAM); if (Program->State != _pi_program::Exe && Program->State != _pi_program::LinkedExe) { - return PI_INVALID_OPERATION; + return PI_INVALID_PROGRAM_EXECUTABLE; } // Search for the function name in each module. @@ -4693,9 +4759,11 @@ pi_result piextGetDeviceFunctionPointer(pi_device Device, pi_program Program, pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) { - assert(Context); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + // Check that incorrect bits are not set in the properties. - assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS))); + PI_ASSERT(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS)), + PI_INVALID_VALUE); ze_host_mem_alloc_desc_t ZeDesc = {}; ZeDesc.flags = 0; @@ -4703,8 +4771,10 @@ pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, ZE_CALL( zeMemAllocHost(Context->ZeContext, &ZeDesc, Size, Alignment, ResultPtr)); - assert(Alignment == 0 || - reinterpret_cast(*ResultPtr) % Alignment == 0); + PI_ASSERT(Alignment == 0 || + reinterpret_cast(*ResultPtr) % Alignment == 0, + PI_INVALID_VALUE); + return PI_SUCCESS; } @@ -4719,10 +4789,12 @@ pi_result USMDeviceAllocImpl(void **ResultPtr, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) { - assert(Context); - assert(Device); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(Device, PI_INVALID_DEVICE); + // Check that incorrect bits are not set in the properties. - assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS))); + PI_ASSERT(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS)), + PI_INVALID_VALUE); // TODO: translate PI properties to Level Zero flags ze_device_mem_alloc_desc_t ZeDesc = {}; @@ -4731,8 +4803,10 @@ pi_result USMDeviceAllocImpl(void **ResultPtr, pi_context Context, ZE_CALL(zeMemAllocDevice(Context->ZeContext, &ZeDesc, Size, Alignment, Device->ZeDevice, ResultPtr)); - assert(Alignment == 0 || - reinterpret_cast(*ResultPtr) % Alignment == 0); + PI_ASSERT(Alignment == 0 || + reinterpret_cast(*ResultPtr) % Alignment == 0, + PI_INVALID_VALUE); + return PI_SUCCESS; } @@ -4740,10 +4814,12 @@ pi_result USMSharedAllocImpl(void **ResultPtr, pi_context Context, pi_device Device, pi_usm_mem_properties *Properties, size_t Size, pi_uint32 Alignment) { - assert(Context); - assert(Device); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + PI_ASSERT(Device, PI_INVALID_DEVICE); + // Check that incorrect bits are not set in the properties. - assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS))); + PI_ASSERT(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS)), + PI_INVALID_VALUE); // TODO: translate PI properties to Level Zero flags ze_host_mem_alloc_desc_t ZeHostDesc = {}; @@ -4754,8 +4830,10 @@ pi_result USMSharedAllocImpl(void **ResultPtr, pi_context Context, ZE_CALL(zeMemAllocShared(Context->ZeContext, &ZeDevDesc, &ZeHostDesc, Size, Alignment, Device->ZeDevice, ResultPtr)); - assert(Alignment == 0 || - reinterpret_cast(*ResultPtr) % Alignment == 0); + PI_ASSERT(Alignment == 0 || + reinterpret_cast(*ResultPtr) % Alignment == 0, + PI_INVALID_VALUE); + return PI_SUCCESS; } @@ -4885,7 +4963,8 @@ pi_result piextUSMFree(pi_context Context, void *Ptr) { // All devices in the context are of the same platform. auto Platform = Context->Devices[0]->Platform; auto Device = Platform->getDeviceFromNativeHandle(ZeDeviceHandle); - assert(Device); + + PI_ASSERT(Device, PI_INVALID_DEVICE); auto DeallocationHelper = [Context, Device, @@ -4943,7 +5022,7 @@ pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, return PI_INVALID_VALUE; } - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4966,7 +5045,7 @@ pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, return PI_INVALID_VALUE; } - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -4991,8 +5070,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, pi_uint32 NumEventsInWaitlist, const pi_event *EventsWaitlist, pi_event *Event) { - assert(!(Flags & ~PI_USM_MIGRATION_TBD0)); - assert(Queue); + PI_ASSERT(!(Flags & ~PI_USM_MIGRATION_TBD0), PI_INVALID_VALUE); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -5050,7 +5129,7 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, size_t Length, pi_mem_advice Advice, pi_event *Event) { - assert(Queue); + PI_ASSERT(Queue, PI_INVALID_QUEUE); // Lock automatically releases when this goes out of scope. std::lock_guard lock(Queue->PiQueueMutex); @@ -5108,7 +5187,8 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, pi_mem_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { - assert(Context); + PI_ASSERT(Context, PI_INVALID_CONTEXT); + ze_device_handle_t ZeDeviceHandle; ze_memory_allocation_properties_t ZeMemoryAllocationProperties = {}; @@ -5166,8 +5246,9 @@ pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, pi_result piKernelSetExecInfo(pi_kernel Kernel, pi_kernel_exec_info ParamName, size_t ParamValueSize, const void *ParamValue) { - assert(Kernel); - assert(ParamValue); + PI_ASSERT(Kernel, PI_INVALID_KERNEL); + PI_ASSERT(ParamValue, PI_INVALID_VALUE); + if (ParamName == PI_USM_INDIRECT_ACCESS && *(static_cast(ParamValue)) == PI_TRUE) { // The whole point for users really was to not need to know anything @@ -5204,10 +5285,13 @@ pi_result piextProgramSetSpecializationConstant(pi_program Prog, } pi_result piPluginInit(pi_plugin *PluginInit) { - assert(PluginInit); + PI_ASSERT(PluginInit, PI_INVALID_VALUE); + // TODO: handle versioning/targets properly. size_t PluginVersionSize = sizeof(PluginInit->PluginVersion); - assert(strlen(_PI_H_VERSION_STRING) < PluginVersionSize); + + PI_ASSERT(strlen(_PI_H_VERSION_STRING) < PluginVersionSize, PI_INVALID_VALUE); + strncpy(PluginInit->PluginVersion, _PI_H_VERSION_STRING, PluginVersionSize); #define _PI_API(api) \