diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c6658245cc03e..7fbffe6d804d4 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -2308,6 +2309,13 @@ pi_result piContextGetInfo(pi_context Context, pi_context_info ParamName, case PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT: // 2D USM fill and memset is not supported. return ReturnValue(pi_bool{false}); + case PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + pi_memory_order_capabilities capabilities = + PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + return ReturnValue(capabilities); + } case PI_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: // TODO: implement other parameters diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 0d4c6a8870dcd..f2ea816b023f1 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -282,8 +282,65 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // For details about Intel UUID extension, see // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md case PI_DEVICE_INFO_UUID: - case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: return PI_ERROR_INVALID_VALUE; + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + // This query is missing beore OpenCL 3.0 + // Check version and handle appropriately + OCLV::OpenCLVersion devVer; + cl_device_id deviceID = cast(device); + cl_int ret_err = getDeviceVersion(deviceID, devVer); + if (ret_err != CL_SUCCESS) { + return cast(ret_err); + } + + // Minimum required capability to be returned + // For OpenCL 1.2, this is all that is required + pi_memory_order_capabilities capabilities = PI_MEMORY_ORDER_RELAXED; + + if (devVer >= OCLV::V3_0) { + // For OpenCL >=3.0, the query should be implemented + cl_device_atomic_capabilities cl_capabilities = 0; + cl_int ret_err = clGetDeviceInfo( + deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &cl_capabilities, nullptr); + if (ret_err != CL_SUCCESS) + return cast(ret_err); + + // Mask operation to only consider atomic_memory_order* capabilities + cl_int mask = CL_DEVICE_ATOMIC_ORDER_RELAXED | + CL_DEVICE_ATOMIC_ORDER_ACQ_REL | + CL_DEVICE_ATOMIC_ORDER_SEQ_CST; + cl_capabilities &= mask; + + // The memory order capabilities are hierarchical, if one is implied, all + // preceding capbilities are implied as well. Especially in the case of + // ACQ_REL. + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) { + capabilities |= PI_MEMORY_ORDER_SEQ_CST; + } + if (cl_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) { + capabilities |= PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE; + } + } else if (devVer >= OCLV::V2_0) { + // For OpenCL 2.x, return all capabilities + // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#_memory_consistency_model) + capabilities |= PI_MEMORY_ORDER_ACQUIRE | PI_MEMORY_ORDER_RELEASE | + PI_MEMORY_ORDER_ACQ_REL | PI_MEMORY_ORDER_SEQ_CST; + } + + if (paramValue) { + if (paramValueSize < sizeof(pi_memory_order_capabilities)) + return static_cast(CL_INVALID_VALUE); + + std::memcpy(paramValue, &capabilities, sizeof(capabilities)); + } + + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(capabilities); + + return static_cast(CL_SUCCESS); + } case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to // SYCL2020 4.6.3.2 diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index ffe3bb6555e06..fef607f3ef185 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -485,6 +485,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, (ur_device_info_t)UR_DEVICE_INFO_BFLOAT16}, {PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES}, + {PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + (ur_device_info_t)UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES}, }; auto InfoType = InfoMapping.find(ParamName); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 7e8fbfc117afc..c859c166c7735 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -1186,6 +1186,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return ReturnValue(result); } + case UR_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + ur_memory_order_capability_flags_t capabilities = + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE | + UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL | + UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; + return ReturnValue(capabilities); + } + + // TODO: Implement. default: zePrint("Unsupported ParamName in piGetDeviceInfo\n"); zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName); @@ -1716,7 +1727,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( // Currently supported partitioning (by affinity domain/numa) would always // partition to all sub-devices. // - if (NumDevices !=0) + if (NumDevices != 0) PI_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); for (uint32_t I = 0; I < NumDevices; I++) { diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 22818eeed302c..198b4f1cc2d30 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace detail { @@ -166,17 +168,26 @@ template <> std::vector context_impl::get_info() const { + std::vector CapabilityList{ + sycl::memory_order::relaxed, sycl::memory_order::acquire, + sycl::memory_order::release, sycl::memory_order::acq_rel, + sycl::memory_order::seq_cst}; if (is_host()) - return {sycl::memory_order::relaxed, sycl::memory_order::acquire, - sycl::memory_order::release, sycl::memory_order::acq_rel, - sycl::memory_order::seq_cst}; + return CapabilityList; + + for (const sycl::device &Device : MDevices) { + std::vector NewCapabilityList(CapabilityList.size()); + std::vector DeviceCapabilities = + Device.get_info(); + std::set_intersection( + CapabilityList.begin(), CapabilityList.end(), + DeviceCapabilities.begin(), DeviceCapabilities.end(), + std::inserter(NewCapabilityList, NewCapabilityList.begin())); + CapabilityList = NewCapabilityList; + } + CapabilityList.shrink_to_fit(); - pi_memory_order_capabilities Result; - getPlugin().call( - MContext, - PiInfoCode::value, - sizeof(Result), &Result, nullptr); - return readMemoryOrderBitfield(Result); + return CapabilityList; } template <> std::vector diff --git a/sycl/source/detail/context_info.hpp b/sycl/source/detail/context_info.hpp index 1056557ec2bad..d30113dc6d08e 100644 --- a/sycl/source/detail/context_info.hpp +++ b/sycl/source/detail/context_info.hpp @@ -29,17 +29,6 @@ typename Param::return_type get_context_info(RT::PiContext Ctx, return Result; } -// Specialization for atomic_memory_order_capabilities, PI returns a bitfield -template <> -std::vector -get_context_info( - RT::PiContext Ctx, const plugin &Plugin) { - pi_memory_order_capabilities Result; - Plugin.call( - Ctx, PiInfoCode::value, - sizeof(Result), &Result, nullptr); - return readMemoryOrderBitfield(Result); -} } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp new file mode 100644 index 0000000000000..6ed291c0da3e8 --- /dev/null +++ b/sycl/unittests/SYCL2020/AtomicMemoryOrderCapabilities.cpp @@ -0,0 +1,65 @@ +//==---- AtomicMemoryOrderCapabilities.cpp --- memory order query test -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace sycl; + +namespace { + +static constexpr size_t expectedCapabilityVecSize = 5; +static thread_local bool deviceGetInfoCalled = false; + +static bool has_capability(const std::vector &deviceCapabilities, + memory_order capabilityToFind) { + return std::find(deviceCapabilities.begin(), deviceCapabilities.end(), + capabilityToFind) != deviceCapabilities.end(); +} + +pi_result redefinedDeviceGetInfo(pi_device device, pi_device_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) { + deviceGetInfoCalled = true; + if (param_value) { + pi_memory_order_capabilities *Capabilities = + reinterpret_cast(param_value); + *Capabilities = PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | + PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | + PI_MEMORY_ORDER_SEQ_CST; + } + } + return PI_SUCCESS; +} + +TEST(AtomicMemoryOrderCapabilities, DeviceQueryReturnsCorrectCapabilities) { + unittest::PiMock Mock; + platform Plt = Mock.getPlatform(); + + Mock.redefineAfter( + redefinedDeviceGetInfo); + + const device Dev = Plt.get_devices()[0]; + context Ctx{Dev}; + + auto Capabilities = + Dev.get_info(); + EXPECT_TRUE(deviceGetInfoCalled); + EXPECT_EQ(Capabilities.size(), expectedCapabilityVecSize); + + EXPECT_TRUE(has_capability(Capabilities, memory_order::relaxed)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::acquire)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::release)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::acq_rel)); + EXPECT_TRUE(has_capability(Capabilities, memory_order::seq_cst)); +} + +} // namespace diff --git a/sycl/unittests/SYCL2020/CMakeLists.txt b/sycl/unittests/SYCL2020/CMakeLists.txt index d27cdc667b29b..4e30050ffd088 100644 --- a/sycl/unittests/SYCL2020/CMakeLists.txt +++ b/sycl/unittests/SYCL2020/CMakeLists.txt @@ -10,6 +10,7 @@ add_sycl_unittest(SYCL2020Tests OBJECT IsCompatible.cpp DeviceGetInfoAspects.cpp DeviceAspectTraits.cpp + AtomicMemoryOrderCapabilities.cpp AtomicMemoryScopeCapabilities.cpp )