From 89f989fe8c25449f0f2dcee860fe602ce86d46f0 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Tue, 28 Feb 2023 07:06:32 -0800 Subject: [PATCH 01/20] Initial implementation of atomic_memory_order_capabilities query This satsifies llvm-test-suite/SYCL/AtomicRef/atomic-memory-order.cpp if you remove the "UNSUPPORTED OpenCL" test line, but doesn't implement the other capabilities this could return. Further discussion is required for further work to continue. --- sycl/plugins/opencl/pi_opencl.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8c30389285c83..4be076551e697 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -282,9 +282,15 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // For details about Intel UUID extension, see // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md case PI_DEVICE_INFO_UUID: - case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: return PI_ERROR_INVALID_VALUE; + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + // Guaranteed to return at least relaxed memory order + cl_int result = 1; + // TODO: Check for support for the rest of the capabilities + std::memcpy(paramValue, &result, sizeof(cl_int)); + return PI_SUCCESS; + } case PI_DEVICE_INFO_ATOMIC_64: { cl_int ret_err = CL_SUCCESS; cl_bool result = CL_FALSE; @@ -849,6 +855,13 @@ pi_result piContextGetInfo(pi_context context, pi_context_info paramName, std::memcpy(paramValue, &result, sizeof(cl_bool)); return PI_SUCCESS; } + case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + // Guaranteed to return at least relaxed memory order + cl_int result = 1; + // TODO: Check for support for the rest of the capabilities + std::memcpy(paramValue, &result, sizeof(cl_int)); + return PI_SUCCESS; + } default: cl_int result = clGetContextInfo( cast(context), cast(paramName), From 4518322a507f35fd364673c24bb8751cebc99b3c Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 2 Mar 2023 07:50:37 -0800 Subject: [PATCH 02/20] Implementation of context atomic_memory_order_capabilities query Minimal implementaton for now that calls similar query for each device and combines them. Currently checks for memory_order::relaxed, others coming soon. --- sycl/plugins/opencl/pi_opencl.cpp | 81 +++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 4be076551e697..9486063ef883f 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -285,6 +285,35 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: return PI_ERROR_INVALID_VALUE; case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + // This query is missing beore OpenCL 3.0 + // Check version and handle appropriately + OCLV::OpenCLVersion devVer, platVer; + cl_platform_id platform; + cl_device_id deviceID = cast(device); + + auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS) { + return cast(ret_err); + } + + ret_err = getDeviceVersion(deviceID, devVer); + if (ret_err != CL_SUCCESS) { + return cast(ret_err); + } + + ret_err = getPlatformVersion(platform, platVer); + if (ret_err != CL_SUCCESS) { + return cast(ret_err); + } + + if (platVer < OCLV::V3_0 || devVer < OCLV::V3_0) { + setErrorMessage( + "OpenCL version for device and/or platform is less than 3.0", + PI_ERROR_INVALID_OPERATION); + return PI_ERROR_INVALID_OPERATION; + } + // Guaranteed to return at least relaxed memory order cl_int result = 1; // TODO: Check for support for the rest of the capabilities @@ -856,10 +885,54 @@ pi_result piContextGetInfo(pi_context context, pi_context_info paramName, return PI_SUCCESS; } case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - // Guaranteed to return at least relaxed memory order - cl_int result = 1; - // TODO: Check for support for the rest of the capabilities - std::memcpy(paramValue, &result, sizeof(cl_int)); + // Get all devices in context + cl_uint deviceCount; + cl_int ret_err = + clGetContextInfo(cast(context), CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &deviceCount, nullptr); + if (ret_err != CL_SUCCESS || deviceCount < 1) + return PI_ERROR_INVALID_CONTEXT; + + std::vector devicesInCtx(deviceCount); + ret_err = clGetContextInfo(cast(context), CL_CONTEXT_DEVICES, + deviceCount * sizeof(cl_device_id), + devicesInCtx.data(), nullptr); + if (ret_err != CL_SUCCESS) + return PI_ERROR_INVALID_CONTEXT; + + // Check for valid platform OpenCL version (>=3.0) + cl_platform_id platform; + ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS) + return PI_ERROR_INVALID_CONTEXT; + + OCLV::OpenCLVersion platVer; + ret_err = getPlatformVersion(platform, platVer); + if (ret_err != CL_SUCCESS || platVer < OCLV::V3_0) + return PI_ERROR_INVALID_CONTEXT; + + // Get device memory order capabilities for each device + // Combine to get all supported capabilities on each device (as per 4.6.3.2) + // TODO: Check for support for the rest of the capabilities other than + // "relaxed" (set other 0's to 1's here) + cl_int commonCapabilities = 0b00001; + + for (cl_device_id device : devicesInCtx) { + // Device version is checked by the call to piDeviceGetInfo + // FIXME: Catch and ignore all <3.0 devices and use all 3.0 and newer? Or + // exit? + cl_int deviceCapabilities; + pi_result pi_ret_err = + piDeviceGetInfo(cast(device), PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + sizeof(cl_int), &deviceCapabilities, nullptr); + if (pi_ret_err != PI_SUCCESS) + return pi_ret_err; + + commonCapabilities &= deviceCapabilities; + } + + std::memcpy(paramValue, &commonCapabilities, sizeof(cl_int)); return PI_SUCCESS; } default: From 275dc691ec4aece58a4f6e579b115a634aee3782 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Wed, 8 Mar 2023 08:45:33 -0800 Subject: [PATCH 03/20] Device info atomic memory order query impl Covering all current OpenCL versions --- sycl/plugins/opencl/pi_opencl.cpp | 97 +++++++------------------------ 1 file changed, 20 insertions(+), 77 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8796e0d0c4e1a..6fd165155aa24 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -287,38 +287,32 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { // This query is missing beore OpenCL 3.0 // Check version and handle appropriately - OCLV::OpenCLVersion devVer, platVer; - cl_platform_id platform; + OCLV::OpenCLVersion devVer; cl_device_id deviceID = cast(device); - - auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - ret_err = getDeviceVersion(deviceID, devVer); + cl_int ret_err = getDeviceVersion(deviceID, devVer); if (ret_err != CL_SUCCESS) { return cast(ret_err); } - ret_err = getPlatformVersion(platform, platVer); - if (ret_err != CL_SUCCESS) { - return cast(ret_err); - } - - if (platVer < OCLV::V3_0 || devVer < OCLV::V3_0) { - setErrorMessage( - "OpenCL version for device and/or platform is less than 3.0", - PI_ERROR_INVALID_OPERATION); - return PI_ERROR_INVALID_OPERATION; + if (devVer < OCLV::V2_0) { + // For OpenCL 1.2, return the minimum required values + cl_int result = PI_MEMORY_ORDER_RELAXED; + std::memcpy(paramValue, &result, sizeof(cl_int)); + return PI_SUCCESS; + } else if (devVer < OCLV::V3_0) { + // For OpenCL 2.x, return all capabilities + // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) + cl_int result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + std::memcpy(paramValue, &result, sizeof(cl_int)); + return PI_SUCCESS; + } else { + // For OpenCL >=3.0, the query should be implemented + cl_int result = clGetDeviceInfo( + cast(device), cast(paramName), + paramValueSize, paramValue, paramValueSizeRet); + return static_cast(result); } - - // Guaranteed to return at least relaxed memory order - cl_int result = 1; - // TODO: Check for support for the rest of the capabilities - std::memcpy(paramValue, &result, sizeof(cl_int)); - return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_64: { cl_int ret_err = CL_SUCCESS; @@ -923,57 +917,6 @@ pi_result piContextGetInfo(pi_context context, pi_context_info paramName, std::memcpy(paramValue, &result, sizeof(cl_bool)); return PI_SUCCESS; } - case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - // Get all devices in context - cl_uint deviceCount; - cl_int ret_err = - clGetContextInfo(cast(context), CL_CONTEXT_NUM_DEVICES, - sizeof(cl_uint), &deviceCount, nullptr); - if (ret_err != CL_SUCCESS || deviceCount < 1) - return PI_ERROR_INVALID_CONTEXT; - - std::vector devicesInCtx(deviceCount); - ret_err = clGetContextInfo(cast(context), CL_CONTEXT_DEVICES, - deviceCount * sizeof(cl_device_id), - devicesInCtx.data(), nullptr); - if (ret_err != CL_SUCCESS) - return PI_ERROR_INVALID_CONTEXT; - - // Check for valid platform OpenCL version (>=3.0) - cl_platform_id platform; - ret_err = clGetDeviceInfo(devicesInCtx[0], CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - if (ret_err != CL_SUCCESS) - return PI_ERROR_INVALID_CONTEXT; - - OCLV::OpenCLVersion platVer; - ret_err = getPlatformVersion(platform, platVer); - if (ret_err != CL_SUCCESS || platVer < OCLV::V3_0) - return PI_ERROR_INVALID_CONTEXT; - - // Get device memory order capabilities for each device - // Combine to get all supported capabilities on each device (as per 4.6.3.2) - // TODO: Check for support for the rest of the capabilities other than - // "relaxed" (set other 0's to 1's here) - cl_int commonCapabilities = 0b00001; - - for (cl_device_id device : devicesInCtx) { - // Device version is checked by the call to piDeviceGetInfo - // FIXME: Catch and ignore all <3.0 devices and use all 3.0 and newer? Or - // exit? - cl_int deviceCapabilities; - pi_result pi_ret_err = - piDeviceGetInfo(cast(device), PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, - sizeof(cl_int), &deviceCapabilities, nullptr); - if (pi_ret_err != PI_SUCCESS) - return pi_ret_err; - - commonCapabilities &= deviceCapabilities; - } - - std::memcpy(paramValue, &commonCapabilities, sizeof(cl_int)); - return PI_SUCCESS; - } default: cl_int result = clGetContextInfo( cast(context), cast(paramName), From 39cdff0134d891cf95d3f6b86b1b3fb5635927c1 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 9 Mar 2023 04:23:41 -0800 Subject: [PATCH 04/20] level_zero implementation of atomic_memory_order_caps Return full set of them as in OCL 2.x --- sycl/plugins/unified_runtime/pi2ur.hpp | 2 ++ .../ur/adapters/level_zero/ur_level_zero.cpp | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 224589f482578..4057e91562da5 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -488,6 +488,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, (zer_device_info_t)ZER_EXT_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS}, {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, + {PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES}, }; auto InfoType = InfoMapping.find(ParamName); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 450a5aff1a4d8..c1f32be5c8b93 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -10,6 +10,7 @@ #include #include +#include #include "ur_level_zero.hpp" #include @@ -1161,6 +1162,12 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( return ReturnValue(bool{false}); } + case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + return ReturnValue(uint32_t{ + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST + }); + } + // TODO: Implement. case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: From 9579a0388b3b6200fab4c19bb2047828199256ee Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 9 Mar 2023 04:35:50 -0800 Subject: [PATCH 05/20] Implement context atomic_mem_order query higher up Instead of resolving it as its own plugin call with PI code, just call the related one for each of its devices and intersect the results together. --- sycl/source/detail/context_impl.cpp | 29 ++++++++++++++++++++--------- sycl/source/detail/context_info.hpp | 11 ----------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 22818eeed302c..198b4f1cc2d30 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { @@ -166,17 +168,26 @@ template <> std::vector context_impl::get_info() const { + std::vector CapabilityList{ + sycl::memory_order::relaxed, sycl::memory_order::acquire, + sycl::memory_order::release, sycl::memory_order::acq_rel, + sycl::memory_order::seq_cst}; if (is_host()) - return {sycl::memory_order::relaxed, sycl::memory_order::acquire, - sycl::memory_order::release, sycl::memory_order::acq_rel, - sycl::memory_order::seq_cst}; + return CapabilityList; + + for (const sycl::device &Device : MDevices) { + std::vector NewCapabilityList(CapabilityList.size()); + std::vector DeviceCapabilities = + Device.get_info(); + std::set_intersection( + CapabilityList.begin(), CapabilityList.end(), + DeviceCapabilities.begin(), DeviceCapabilities.end(), + std::inserter(NewCapabilityList, NewCapabilityList.begin())); + CapabilityList = NewCapabilityList; + } + CapabilityList.shrink_to_fit(); - pi_memory_order_capabilities Result; - getPlugin().call( - MContext, - PiInfoCode::value, - sizeof(Result), &Result, nullptr); - return readMemoryOrderBitfield(Result); + return CapabilityList; } template <> std::vector diff --git a/sycl/source/detail/context_info.hpp b/sycl/source/detail/context_info.hpp index 1056557ec2bad..d30113dc6d08e 100644 --- a/sycl/source/detail/context_info.hpp +++ b/sycl/source/detail/context_info.hpp @@ -29,17 +29,6 @@ typename Param::return_type get_context_info(RT::PiContext Ctx, return Result; } -// Specialization for atomic_memory_order_capabilities, PI returns a bitfield -template <> -std::vector -get_context_info( - RT::PiContext Ctx, const plugin &Plugin) { - pi_memory_order_capabilities Result; - Plugin.call( - Ctx, PiInfoCode::value, - sizeof(Result), &Result, nullptr); - return readMemoryOrderBitfield(Result); -} } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl From 2f99c5a4b66d3f3118d6d8d8477d58ab300c5566 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 9 Mar 2023 04:36:10 -0800 Subject: [PATCH 06/20] Clang format fixes to ur_level_zero atomic_mem_order impl --- .../ur/adapters/level_zero/ur_level_zero.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index c1f32be5c8b93..c09755a6d7b78 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -10,8 +10,8 @@ #include #include -#include #include "ur_level_zero.hpp" +#include #include // Define the static class field @@ -1163,9 +1163,9 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( } case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - return ReturnValue(uint32_t{ - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST - }); + return ReturnValue(uint32_t{PI_MEMORY_ORDER_RELAXED | + PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST}); } // TODO: Implement. From a508c498d5c36c9f5c7cc008088cc1ff473b07dc Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 9 Mar 2023 05:13:14 -0800 Subject: [PATCH 07/20] Fixed level_zero atomic_mem_order_caps value --- sycl/plugins/unified_runtime/pi2ur.hpp | 2 +- .../unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 4057e91562da5..4dca2202b0231 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -489,7 +489,7 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, {PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, - (zer_device_info_t)ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES}, + (zer_device_info_t)ZER_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES}, }; auto InfoType = InfoMapping.find(ParamName); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index c09755a6d7b78..4aa5dac9a981a 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1162,7 +1162,7 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( return ReturnValue(bool{false}); } - case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + case ZER_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { return ReturnValue(uint32_t{PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST}); From d9c49ff91c773d161eadef4ec8a50827a80dc3eb Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 9 Mar 2023 05:14:49 -0800 Subject: [PATCH 08/20] Added bitmask for returned value from OpenCL 3.0 runtime Unsure whether ACQUIRE and RELEASE need removing as current pi.h expects 5 bits for memory order info but the OCL headers say only 3 are necessary in 3.0 as the values are RELAXED, ACQ_REL, and SEQ_CST. --- sycl/include/sycl/detail/pi.h | 2 ++ sycl/plugins/opencl/pi_opencl.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 56b8b33fae583..df1a19fffb089 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -554,6 +554,8 @@ constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04; constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08; constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_BITMASK = 0x07; + using pi_memory_scope_capabilities = pi_bitfield; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 6fd165155aa24..1453b53c8c856 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -311,7 +311,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, cl_int result = clGetDeviceInfo( cast(device), cast(paramName), paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result); + return static_cast(result & PI_MEMORY_ORDER_BITMASK); } } case PI_DEVICE_INFO_ATOMIC_64: { From 657da82ae9df2c73fa276e2421d0dca468f04e80 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 10 Mar 2023 06:39:57 -0800 Subject: [PATCH 09/20] Added missing PI mem order enum values to level_zero api ACQ_REL infers ACQUIRE and RELEASE, so adding them to the return value. --- .../unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 4aa5dac9a981a..a94b000b13dde 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1164,6 +1164,8 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( case ZER_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { return ReturnValue(uint32_t{PI_MEMORY_ORDER_RELAXED | + PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST}); } From 1e6d67151efa6f4749764ecb2170c410e5233701 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 10 Mar 2023 08:00:42 -0800 Subject: [PATCH 10/20] More robust handling of out param values in piDeviceGetInfo Moved mask location, added bitwise operations to convert CL to PI enum values, masked off relevant bits, and improved handling of out parameters. --- sycl/include/sycl/detail/pi.h | 2 - sycl/plugins/opencl/pi_opencl.cpp | 66 ++++++++++++++++++++++++++----- 2 files changed, 56 insertions(+), 12 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index df1a19fffb089..56b8b33fae583 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -554,8 +554,6 @@ constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04; constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08; constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10; -constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_BITMASK = 0x07; - using pi_memory_scope_capabilities = pi_bitfield; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_ITEM = 0x01; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SUB_GROUP = 0x02; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1453b53c8c856..e30acf11836cb 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -296,23 +296,69 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, if (devVer < OCLV::V2_0) { // For OpenCL 1.2, return the minimum required values - cl_int result = PI_MEMORY_ORDER_RELAXED; - std::memcpy(paramValue, &result, sizeof(cl_int)); - return PI_SUCCESS; + if (paramValue && paramValueSize < sizeof(cl_int)) + return static_cast(CL_INVALID_VALUE); + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(cl_int); + + if (paramValue) { + cl_int capabilities = PI_MEMORY_ORDER_RELAXED; + std::memcpy(paramValue, &capabilities, sizeof(cl_int)); + } + return static_cast(CL_SUCCESS); } else if (devVer < OCLV::V3_0) { // For OpenCL 2.x, return all capabilities // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) - cl_int result = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQ_REL | + if (paramValue && paramValueSize < sizeof(cl_int)) + return static_cast(CL_INVALID_VALUE); + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(cl_int); + + if (paramValue) { + cl_int capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; - std::memcpy(paramValue, &result, sizeof(cl_int)); - return PI_SUCCESS; - } else { + std::memcpy(paramValue, &capabilities, sizeof(cl_int)); + } + return static_cast(CL_SUCCESS); + } +#ifdef CL_VERSION_3_0 + if (devVer >= OCLV::V3_0) { // For OpenCL >=3.0, the query should be implemented - cl_int result = clGetDeviceInfo( + cl_int capabilities = CL_DEVICE_ATOMIC_ORDER_RELAXED; + cl_int ret_err = clGetDeviceInfo( cast(device), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); - return static_cast(result & PI_MEMORY_ORDER_BITMASK); + paramValueSize, &result, paramValueSizeRet); + if (ret_err != CL_SUCCESS) + return cast(ret_err); + + if (paramValue && paramValueSize < sizeof(cl_int)) + return static_cast(CL_INVALID_VALUE); + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(cl_int); + + if (paramValue) { + // Mask operation to only consider atomic_memory_order* capabilities + cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + capabilities &= mask; + + // Convert from OCL bitfield to SYCL PI bitfield + // OCL could return (masked) 00000111 for all capabilities + // PI would want that to be ...11111 for all capabilities as well as ACQUIRE and RELEASE + // So need to bitshift and fill in result + if (capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + capabilities &= ~CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + capabilities |= PI_MEMORY_ORDER_SEQ_CST; + } + if (capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + capabilities &= ~CL_DEVICE_ATOMIC_ORDER_ACQ_REL; + capabilities |= (PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE); + } + + std::memcpy(paramValue, &capabilities, sizeof(cl_int)); + } } +#endif + return static_cast(CL_SUCCESS); } case PI_DEVICE_INFO_ATOMIC_64: { cl_int ret_err = CL_SUCCESS; From 58c53ad954c9a456d56db956d9601b1e6c1e5346 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 10 Mar 2023 08:01:46 -0800 Subject: [PATCH 11/20] Resolved clang formatting fixes --- sycl/plugins/opencl/pi_opencl.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e30acf11836cb..e82ae7c2f2999 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -315,8 +315,10 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, *paramValueSizeRet = sizeof(cl_int); if (paramValue) { - cl_int capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + cl_int capabilities = PI_MEMORY_ORDER_RELAXED | + PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | + PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; std::memcpy(paramValue, &capabilities, sizeof(cl_int)); } return static_cast(CL_SUCCESS); @@ -335,23 +337,26 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, return static_cast(CL_INVALID_VALUE); if (paramValueSizeRet) *paramValueSizeRet = sizeof(cl_int); - + if (paramValue) { // Mask operation to only consider atomic_memory_order* capabilities - cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_ORDER_ACQ_REL | CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + CL_DEVICE_ATOMIC_ORDER_ACQ_REL | + CL_DEVICE_ATOMIC_ORDER_SEQ_CST; capabilities &= mask; // Convert from OCL bitfield to SYCL PI bitfield // OCL could return (masked) 00000111 for all capabilities - // PI would want that to be ...11111 for all capabilities as well as ACQUIRE and RELEASE - // So need to bitshift and fill in result + // PI would want that to be ...11111 for all capabilities as well as + // ACQUIRE and RELEASE So need to bitshift and fill in result if (capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { capabilities &= ~CL_DEVICE_ATOMIC_ORDER_SEQ_CST; capabilities |= PI_MEMORY_ORDER_SEQ_CST; } if (capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { capabilities &= ~CL_DEVICE_ATOMIC_ORDER_ACQ_REL; - capabilities |= (PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE); + capabilities |= (PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE); } std::memcpy(paramValue, &capabilities, sizeof(cl_int)); From d18bcca5bf01b9f22b1298f4601a7dd8a352db0b Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 10 Mar 2023 08:54:01 -0800 Subject: [PATCH 12/20] Small fix to UR code for atomic_mem_order_caps case --- .../unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 191319fd736b1..02169d0c62abc 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1166,7 +1166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(bool{false}); } - case ZER_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { return ReturnValue(uint32_t{PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | From be8ff59dfbcfae401689ca3c333f27f114da5802 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 10 Mar 2023 09:04:41 -0800 Subject: [PATCH 13/20] Formatting fix in UR --- .../ur/adapters/level_zero/ur_level_zero.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 02169d0c62abc..40dcb476412b1 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1167,11 +1167,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - return ReturnValue(uint32_t{PI_MEMORY_ORDER_RELAXED | - PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | - PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST}); + return ReturnValue( + uint32_t{PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST}); } // TODO: Implement. From 90e60ac88ef281cb5a5acb8fd309099d64035e13 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Tue, 14 Mar 2023 08:04:12 -0700 Subject: [PATCH 14/20] Context memory_order impl for level_zero --- sycl/plugins/level_zero/pi_level_zero.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 959579aa5320d..913708329f614 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2297,6 +2298,10 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: // 2D USM fill and memset is not supported. return ReturnValue(pi_bool{false}); + case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + return ReturnValue(pi_uint32{PI_MEMORY_ORDER_RELAXED | + PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | + PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST }); case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: // TODO: implement other parameters From 64e7f27ea2a8ed84e2b38806fb903d5401c36aca Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 16 Mar 2023 03:54:58 -0700 Subject: [PATCH 15/20] Formatting issue resolved --- sycl/plugins/level_zero/pi_level_zero.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 57b263252a298..577e91bf90e15 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2299,9 +2299,10 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, // 2D USM fill and memset is not supported. return ReturnValue(pi_bool{false}); case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - return ReturnValue(pi_uint32{PI_MEMORY_ORDER_RELAXED | - PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | - PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST }); + return ReturnValue( + pi_uint32{PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST}); case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: // TODO: implement other parameters From 904fcc5dd98b9f37a25b9fa76429e15484b8465c Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Thu, 16 Mar 2023 05:07:55 -0700 Subject: [PATCH 16/20] Used actual type to return. --- sycl/plugins/level_zero/pi_level_zero.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 577e91bf90e15..fc12b29ee1167 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2298,11 +2298,13 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: // 2D USM fill and memset is not supported. return ReturnValue(pi_bool{false}); - case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - return ReturnValue( - pi_uint32{PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST}); + case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + pi_memory_order_capabilities capabilities = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + return ReturnValue(capabilities); + } case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: // TODO: implement other parameters From 9b92fbeb7b5a543d495d975c5e6a0b1a88b49e6c Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 17 Mar 2023 11:06:09 -0700 Subject: [PATCH 17/20] Add unit test for memory_order device query --- .../AtomicMemoryOrderCapabilities.cpp | 65 +++++++++++++++++++ sycl/unittests/SYCL2020/CMakeLists.txt | 1 + 2 files changed, 66 insertions(+) create mode 100644 sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp diff --git a/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp new file mode 100644 index 0000000000000..6ed291c0da3e8 --- /dev/null +++ b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp @@ -0,0 +1,65 @@ +//==---- AtomicMemoryOrderCapabilities.cpp --- memory order query test -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace sycl; + +namespace { + +static constexpr size_t expectedCapabilityVecSize = 5; +static thread_local bool deviceGetInfoCalled = false; + +static bool has_capability(const std::vector &deviceCapabilities, + memory_order capabilityToFind) { + return std::find(deviceCapabilities.begin(), deviceCapabilities.end(), + capabilityToFind) != deviceCapabilities.end(); +} + +pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { + deviceGetInfoCalled = true; + if (param_value) { + pi_memory_order_capabilities *Capabilities = + reinterpret_cast(param_value); + *Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + } + } + return PI_SUCCESS; +} + +TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) { + unittest::PiMock Mock; + platform Plt = Mock.getPlatform(); + + Mock.redefineAfter( + redefinedDeviceGetInfo); + + const device Dev = Plt.get_devices()[0]; + context Ctx{Dev}; + + auto Capabilities = + Dev.get_info(); + EXPECT_TRUE(deviceGetInfoCalled); + EXPECT_EQ(Capabilities.size(), expectedCapabilityVecSize); + + EXPECT_TRUE(has_capability(Capabilities, memory_order::relaxed)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::acquire)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::release)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::acq_rel)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::seq_cst)); +} + +} // namespace diff --git a/sycl/unittests/SYCL2020/CMakeLists.txt b/sycl/unittests/SYCL2020/CMakeLists.txt index 9e22f73abfa00..04d0816c40cdc 100644 --- a/sycl/unittests/SYCL2020/CMakeLists.txt +++ b/sycl/unittests/SYCL2020/CMakeLists.txt @@ -9,5 +9,6 @@ add_sycl_unittest(SYCL2020Tests OBJECT IsCompatible.cpp DeviceGetInfoAspects.cpp DeviceAspectTraits.cpp + AtomicMemoryOrderCapabilities.cpp ) From 5b87263809ceeb326c43e91369281b6d2363495d Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 17 Mar 2023 11:07:19 -0700 Subject: [PATCH 18/20] Refined query implementation with fixes and less code duplication --- opencl/CMakeLists.txt | 2 +- sycl/include/sycl/detail/cl.h | 4 +- sycl/plugins/opencl/pi_opencl.cpp | 97 +++++++------------ .../ur/adapters/level_zero/ur_level_zero.cpp | 9 +- 4 files changed, 45 insertions(+), 67 deletions(-) diff --git a/opencl/CMakeLists.txt b/opencl/CMakeLists.txt index 1442a1ac43075..4ed26bd5e719d 100644 --- a/opencl/CMakeLists.txt +++ b/opencl/CMakeLists.txt @@ -44,7 +44,7 @@ FetchContent_GetProperties(ocl-headers) set(OpenCL_INCLUDE_DIR ${ocl-headers_SOURCE_DIR} CACHE PATH "Path to OpenCL Headers") -target_compile_definitions(Headers INTERFACE -DCL_TARGET_OPENCL_VERSION=220) +target_compile_definitions(Headers INTERFACE -DCL_TARGET_OPENCL_VERSION=300) add_library(OpenCL-Headers ALIAS Headers) # OpenCL Library (ICD Loader) diff --git a/sycl/include/sycl/detail/cl.h b/sycl/include/sycl/detail/cl.h index 7e90fe126e40d..20d640bcff59f 100644 --- a/sycl/include/sycl/detail/cl.h +++ b/sycl/include/sycl/detail/cl.h @@ -9,9 +9,9 @@ #pragma once // Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION -// and define all symbols up to OpenCL 2.2 +// and define all symbols up to OpenCL 3.0 #ifndef CL_TARGET_OPENCL_VERSION -#define CL_TARGET_OPENCL_VERSION 220 +#define CL_TARGET_OPENCL_VERSION 300 #endif #include diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e82ae7c2f2999..c61f902138787 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -294,75 +294,52 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, return cast(ret_err); } - if (devVer < OCLV::V2_0) { - // For OpenCL 1.2, return the minimum required values - if (paramValue && paramValueSize < sizeof(cl_int)) - return static_cast(CL_INVALID_VALUE); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(cl_int); + // Minimum required capability to be returned + // For OpenCL 1.2, this is all that is required + pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED; - if (paramValue) { - cl_int capabilities = PI_MEMORY_ORDER_RELAXED; - std::memcpy(paramValue, &capabilities, sizeof(cl_int)); - } - return static_cast(CL_SUCCESS); - } else if (devVer < OCLV::V3_0) { - // For OpenCL 2.x, return all capabilities - // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) - if (paramValue && paramValueSize < sizeof(cl_int)) - return static_cast(CL_INVALID_VALUE); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(cl_int); - - if (paramValue) { - cl_int capabilities = PI_MEMORY_ORDER_RELAXED | - PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | - PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; - std::memcpy(paramValue, &capabilities, sizeof(cl_int)); - } - return static_cast(CL_SUCCESS); - } -#ifdef CL_VERSION_3_0 if (devVer >= OCLV::V3_0) { // For OpenCL >=3.0, the query should be implemented - cl_int capabilities = CL_DEVICE_ATOMIC_ORDER_RELAXED; + cl_device_atomic_capabilities cl_capabilities = 0; cl_int ret_err = clGetDeviceInfo( - cast(device), cast(paramName), - paramValueSize, &result, paramValueSizeRet); + deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr); if (ret_err != CL_SUCCESS) return cast(ret_err); - if (paramValue && paramValueSize < sizeof(cl_int)) - return static_cast(CL_INVALID_VALUE); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(cl_int); - - if (paramValue) { - // Mask operation to only consider atomic_memory_order* capabilities - cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | - CL_DEVICE_ATOMIC_ORDER_ACQ_REL | - CL_DEVICE_ATOMIC_ORDER_SEQ_CST; - capabilities &= mask; - - // Convert from OCL bitfield to SYCL PI bitfield - // OCL could return (masked) 00000111 for all capabilities - // PI would want that to be ...11111 for all capabilities as well as - // ACQUIRE and RELEASE So need to bitshift and fill in result - if (capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { - capabilities &= ~CL_DEVICE_ATOMIC_ORDER_SEQ_CST; - capabilities |= PI_MEMORY_ORDER_SEQ_CST; - } - if (capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { - capabilities &= ~CL_DEVICE_ATOMIC_ORDER_ACQ_REL; - capabilities |= (PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE); - } - - std::memcpy(paramValue, &capabilities, sizeof(cl_int)); + // Mask operation to only consider atomic_memory_order* capabilities + cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + CL_DEVICE_ATOMIC_ORDER_ACQ_REL | + CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + cl_capabilities &= mask; + + // The memory order capabilities are hierarchical, if one is implied, all + // preceding capbilities are implied as well. Especially in the case of + // ACQ_REL. + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + capabilities |= PI_MEMORY_ORDER_SEQ_CST; + } + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE; } + } else if (devVer >= OCLV::V2_0) { + // For OpenCL 2.x, return all capabilities + // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) + capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | + PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; } -#endif + + if (paramValue) { + if (paramValueSize < sizeof(pi_memory_order_capabilities)) + return static_cast(CL_INVALID_VALUE); + + std::memcpy(paramValue, &capabilities, sizeof(capabilities)); + } + + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(capabilities); + return static_cast(CL_SUCCESS); } case PI_DEVICE_INFO_ATOMIC_64: { diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 50b47eb54d5b2..0478a014c3525 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1176,10 +1176,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - return ReturnValue( - uint32_t{PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST}); + pi_memory_order_capabilities capabilities = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + return ReturnValue(capabilities); } // TODO: Implement. From 9652f61ce914268705606a94308595856bbe9f25 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Fri, 17 Mar 2023 11:19:16 -0700 Subject: [PATCH 19/20] Formatting fix on pi_level_zero that was missed --- sycl/plugins/level_zero/pi_level_zero.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ffb2cbc691e68..268619d6419f9 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -2311,9 +2311,9 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, return ReturnValue(pi_bool{false}); case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { pi_memory_order_capabilities capabilities = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; return ReturnValue(capabilities); } case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: From 6f4a2f7a301c6ba1aa0461183d2793a31a128f52 Mon Sep 17 00:00:00 2001 From: "Lamzed-Short, Andrew" Date: Wed, 22 Mar 2023 12:25:25 -0700 Subject: [PATCH 20/20] Swapped PI enum use for UR --- .../ur/adapters/level_zero/ur_level_zero.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 0478a014c3525..f2f44c048bec6 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -11,7 +11,6 @@ #include #include "ur_level_zero.hpp" -#include #include // Define the static class field @@ -1176,10 +1175,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { - pi_memory_order_capabilities capabilities = - PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | - PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | - PI_MEMORY_ORDER_SEQ_CST; + ur_memory_order_capability_flags_t capabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; return ReturnValue(capabilities); }