diff --git a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp index cfbe0a36ab0b4..fa0b1299fe767 100644 --- a/sycl/include/sycl/ext/oneapi/memcpy2d.hpp +++ b/sycl/include/sycl/ext/oneapi/memcpy2d.hpp @@ -34,7 +34,7 @@ void handler::ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, #endif // Get the type of the pointers. - context Ctx = detail::createSyclObjFromImpl(getContextImplPtr()); + detail::context_impl &Ctx = getContextImpl(); usm::alloc SrcAllocType = get_pointer_type(Src, Ctx); usm::alloc DestAllocType = get_pointer_type(Dest, Ctx); bool SrcIsHost = @@ -71,7 +71,7 @@ void handler::ext_oneapi_copy2d(const T *Src, size_t SrcPitch, T *Dest, "to the width specified in 'ext_oneapi_copy2d'"); // Get the type of the pointers. - context Ctx = detail::createSyclObjFromImpl(getContextImplPtr()); + detail::context_impl &Ctx = getContextImpl(); usm::alloc SrcAllocType = get_pointer_type(Src, Ctx); usm::alloc DestAllocType = get_pointer_type(Dest, Ctx); bool SrcIsHost = @@ -106,7 +106,7 @@ void handler::ext_oneapi_memset2d(void *Dest, size_t DestPitch, int Value, "to the width specified in 'ext_oneapi_memset2d'"); T CharVal = static_cast(Value); - context Ctx = detail::createSyclObjFromImpl(getContextImplPtr()); + detail::context_impl &Ctx = getContextImpl(); usm::alloc DestAllocType = get_pointer_type(Dest, Ctx); // If the backends supports 2D fill we use that. Otherwise we use a fallback @@ -130,7 +130,7 @@ void handler::ext_oneapi_fill2d(void *Dest, size_t DestPitch, const T &Pattern, "Destination pitch must be greater than or equal " "to the width specified in 'ext_oneapi_fill2d'"); - context Ctx = detail::createSyclObjFromImpl(getContextImplPtr()); + detail::context_impl &Ctx = getContextImpl(); usm::alloc DestAllocType = get_pointer_type(Dest, Ctx); // If the backends supports 2D fill we use that. Otherwise we use a fallback diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 3cf26b5432298..27ca25350f920 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -3530,7 +3530,10 @@ class __SYCL_EXPORT handler { UserRange, KernelFunc}; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES const std::shared_ptr &getContextImplPtr() const; +#endif + detail::context_impl &getContextImpl() const; // Checks if 2D memory operations are supported by the underlying platform. bool supportsUSMMemcpy2D(); diff --git a/sycl/include/sycl/interop_handle.hpp b/sycl/include/sycl/interop_handle.hpp index cf372bef03914..c865648048a5f 100644 --- a/sycl/include/sycl/interop_handle.hpp +++ b/sycl/include/sycl/interop_handle.hpp @@ -205,6 +205,12 @@ class interop_handle { friend class detail::DispatchHostTask; using ReqToMem = std::pair; +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + // Clean this up (no shared pointers). Not doing it right now because I expect + // there will be several iterations of simplifications possible and it would + // be hard to track which of them made their way into a minor public release + // and which didn't. Let's just clean it up once during ABI breaking window. +#endif interop_handle(std::vector MemObjs, const std::shared_ptr &Queue, const std::shared_ptr &Device, diff --git a/sycl/include/sycl/usm/usm_pointer_info.hpp b/sycl/include/sycl/usm/usm_pointer_info.hpp index a00e125c019b1..b16c183e71578 100644 --- a/sycl/include/sycl/usm/usm_pointer_info.hpp +++ b/sycl/include/sycl/usm/usm_pointer_info.hpp @@ -16,12 +16,23 @@ inline namespace _V1 { class device; class context; +namespace detail { +class context_impl; +__SYCL_EXPORT usm::alloc get_pointer_type(const void *ptr, context_impl &ctxt); +} // namespace detail + // Pointer queries /// Query the allocation type from a USM pointer /// /// \param ptr is the USM pointer to query /// \param ctxt is the sycl context the ptr was allocated in +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +inline usm::alloc get_pointer_type(const void *ptr, const context &ctxt) { + return get_pointer_type(ptr, *getSyclObjImpl(ctxt)); +} +#else __SYCL_EXPORT usm::alloc get_pointer_type(const void *ptr, const context &ctxt); +#endif /// Queries the device against which the pointer was allocated /// Throws an exception with errc::invalid error code if ptr is a host diff --git a/sycl/source/detail/async_alloc.cpp b/sycl/source/detail/async_alloc.cpp index 8ed468e8dae7a..6a4a3125d6d34 100644 --- a/sycl/source/detail/async_alloc.cpp +++ b/sycl/source/detail/async_alloc.cpp @@ -67,7 +67,7 @@ void *async_malloc(sycl::handler &h, sycl::usm::alloc kind, size_t size) { sycl::make_error_code(sycl::errc::feature_not_supported), "Only device backed asynchronous allocations are supported!"); - auto &Adapter = h.getContextImplPtr()->getAdapter(); + auto &Adapter = h.getContextImpl().getAdapter(); // Get CG event dependencies for this allocation. const auto &DepEvents = h.impl->CGData.MEvents; @@ -117,7 +117,7 @@ __SYCL_EXPORT void *async_malloc(const sycl::queue &q, sycl::usm::alloc kind, __SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size, const memory_pool &pool) { - auto &Adapter = h.getContextImplPtr()->getAdapter(); + auto &Adapter = h.getContextImpl().getAdapter(); auto &memPoolImpl = sycl::detail::getSyclObjImpl(pool); // Get CG event dependencies for this allocation. diff --git a/sycl/source/detail/backend_impl.hpp b/sycl/source/detail/backend_impl.hpp index 0c160ed1920c4..6ec21faf4e6fc 100644 --- a/sycl/source/detail/backend_impl.hpp +++ b/sycl/source/detail/backend_impl.hpp @@ -15,7 +15,7 @@ inline namespace _V1 { namespace detail { template backend getImplBackend(const T &Impl) { - return Impl->getContextImplPtr()->getBackend(); + return Impl->getContextImpl().getBackend(); } } // namespace detail diff --git a/sycl/source/detail/bindless_images.cpp b/sycl/source/detail/bindless_images.cpp index 4de1d0d2e41f3..8cc9400fe145d 100644 --- a/sycl/source/detail/bindless_images.cpp +++ b/sycl/source/detail/bindless_images.cpp @@ -813,9 +813,9 @@ get_image_memory_support(const image_descriptor &imageDescriptor, const sycl::context &syclContext) { std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - std::shared_ptr CtxImpl = - sycl::detail::getSyclObjImpl(syclContext); - const sycl::detail::AdapterPtr &Adapter = CtxImpl->getAdapter(); + sycl::detail::context_impl &CtxImpl = + *sycl::detail::getSyclObjImpl(syclContext); + const sycl::detail::AdapterPtr &Adapter = CtxImpl.getAdapter(); ur_image_desc_t urDesc; ur_image_format_t urFormat; @@ -825,7 +825,7 @@ get_image_memory_support(const image_descriptor &imageDescriptor, Adapter->call( - CtxImpl->getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, + CtxImpl.getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, ur_exp_image_mem_type_t::UR_EXP_IMAGE_MEM_TYPE_USM_POINTER, &supportsPointerAllocation); @@ -833,7 +833,7 @@ get_image_memory_support(const image_descriptor &imageDescriptor, Adapter->call( - CtxImpl->getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, + CtxImpl.getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, ur_exp_image_mem_type_t::UR_EXP_IMAGE_MEM_TYPE_OPAQUE_HANDLE, &supportsOpaqueAllocation); @@ -864,9 +864,9 @@ __SYCL_EXPORT bool is_image_handle_supported( const sycl::device &syclDevice, const sycl::context &syclContext) { std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - std::shared_ptr CtxImpl = - sycl::detail::getSyclObjImpl(syclContext); - const sycl::detail::AdapterPtr &Adapter = CtxImpl->getAdapter(); + sycl::detail::context_impl &CtxImpl = + *sycl::detail::getSyclObjImpl(syclContext); + const sycl::detail::AdapterPtr &Adapter = CtxImpl.getAdapter(); ur_image_desc_t urDesc; ur_image_format_t urFormat; @@ -881,7 +881,7 @@ __SYCL_EXPORT bool is_image_handle_supported( Adapter->call( - CtxImpl->getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, + CtxImpl.getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, memHandleType, &supportsUnsampledHandle); return supportsUnsampledHandle; @@ -904,9 +904,9 @@ __SYCL_EXPORT bool is_image_handle_supported( const sycl::device &syclDevice, const sycl::context &syclContext) { std::shared_ptr DevImpl = sycl::detail::getSyclObjImpl(syclDevice); - std::shared_ptr CtxImpl = - sycl::detail::getSyclObjImpl(syclContext); - const sycl::detail::AdapterPtr &Adapter = CtxImpl->getAdapter(); + sycl::detail::context_impl &CtxImpl = + *sycl::detail::getSyclObjImpl(syclContext); + const sycl::detail::AdapterPtr &Adapter = CtxImpl.getAdapter(); ur_image_desc_t urDesc; ur_image_format_t urFormat; @@ -921,7 +921,7 @@ __SYCL_EXPORT bool is_image_handle_supported( Adapter->call< sycl::errc::runtime, sycl::detail::UrApiKind::urBindlessImagesGetImageSampledHandleSupportExp>( - CtxImpl->getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, + CtxImpl.getHandleRef(), DevImpl->getHandleRef(), &urDesc, &urFormat, memHandleType, &supportsSampledHandle); return supportsSampledHandle; diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 3f93b1b03721b..ba320c90d8598 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -364,12 +364,14 @@ void GetCapabilitiesIntersectionSet(const std::vector &Devices, // We're under sycl/source and these won't be exported but it's way more // convenient to be able to reference them without extra `detail::`. -inline auto get_ur_handles(const sycl::context &syclContext) { - sycl::detail::context_impl &Ctx = *sycl::detail::getSyclObjImpl(syclContext); +inline auto get_ur_handles(sycl::detail::context_impl &Ctx) { ur_context_handle_t urCtx = Ctx.getHandleRef(); const sycl::detail::Adapter *Adapter = Ctx.getAdapter().get(); return std::tuple{urCtx, Adapter}; } +inline auto get_ur_handles(const sycl::context &syclContext) { + return get_ur_handles(*sycl::detail::getSyclObjImpl(syclContext)); +} inline auto get_ur_handles(const sycl::device &syclDevice, const sycl::context &syclContext) { auto [urCtx, Adapter] = get_ur_handles(syclContext); diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 4d7885a315456..4955abd376610 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -570,13 +570,13 @@ class device_image_impl { ur_native_handle_t getNative() const { assert(MProgram); - const auto &ContextImplPtr = detail::getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImplPtr->getAdapter(); + context_impl &ContextImpl = *detail::getSyclObjImpl(MContext); + const AdapterPtr &Adapter = ContextImpl.getAdapter(); ur_native_handle_t NativeProgram = 0; Adapter->call(MProgram, &NativeProgram); - if (ContextImplPtr->getBackend() == backend::opencl) + if (ContextImpl.getBackend() == backend::opencl) __SYCL_OCL_CALL(clRetainProgram, ur::cast(NativeProgram)); return NativeProgram; diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 72fe112375a64..580902782939a 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -199,7 +199,7 @@ class handler_impl { template context_impl &get_context() { Self *self = this; if (auto *Queue = self->get_queue_or_null()) - return *Queue->getContextImplPtr(); + return Queue->getContextImpl(); else return *self->get_graph().getContextImplPtr(); } diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 894b8b8063178..47667bd1e05f2 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -24,7 +24,6 @@ namespace sycl { inline namespace _V1 { -using ContextImplPtr = std::shared_ptr; namespace detail { void waitEvents(std::vector DepEvents) { for (auto SyclEvent : DepEvents) { @@ -59,10 +58,10 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, if (DeviceImage == DeviceImages.end()) { return {nullptr, nullptr}; } - auto ContextImpl = Queue.getContextImplPtr(); + context_impl &ContextImpl = Queue.getContextImpl(); ur_program_handle_t Program = detail::ProgramManager::getInstance().createURProgram( - **DeviceImage, *ContextImpl, {createSyclObjFromImpl(Dev)}); + **DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); return {*DeviceImage, Program}; } @@ -80,11 +79,11 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, DeviceImage = SyclKernelImpl->getDeviceImage()->get_bin_image_ref(); Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else { - auto ContextImpl = Queue.getContextImplPtr(); + context_impl &ContextImpl = Queue.getContextImpl(); DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage( - KernelName, *ContextImpl, Dev); + KernelName, ContextImpl, Dev); Program = detail::ProgramManager::getInstance().createURProgram( - *DeviceImage, *ContextImpl, {createSyclObjFromImpl(Dev)}); + *DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); } return {DeviceImage, Program}; } diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 3dc45ae8e8602..eb1f92d3ca49a 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -232,7 +232,7 @@ class kernel_impl { bool isInterop() const { return MIsInterop; } ur_program_handle_t getProgramRef() const { return MProgram; } - ContextImplPtr getContextImplPtr() const { return MContext; } + context_impl &getContextImpl() const { return *MContext; } std::mutex &getNoncacheableEnqueueMutex() const { return MNoncacheableEnqueueMutex; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index f9a9f41450ec4..f007225038856 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -80,7 +80,7 @@ template <> device queue_impl::get_info() const { template <> typename info::platform::version::return_type queue_impl::get_backend_info() const { - if (getContextImplPtr()->getBackend() != backend::opencl) { + if (getContextImpl().getBackend() != backend::opencl) { throw sycl::exception(errc::backend_mismatch, "the info::platform::version info descriptor can " "only be queried with an OpenCL backend"); @@ -93,7 +93,7 @@ queue_impl::get_backend_info() const { template <> typename info::device::version::return_type queue_impl::get_backend_info() const { - if (getContextImplPtr()->getBackend() != backend::opencl) { + if (getContextImpl().getBackend() != backend::opencl) { throw sycl::exception(errc::backend_mismatch, "the info::device::version info descriptor can only " "be queried with an OpenCL backend"); @@ -106,7 +106,7 @@ queue_impl::get_backend_info() const { template <> typename info::device::backend_version::return_type queue_impl::get_backend_info() const { - if (getContextImplPtr()->getBackend() != backend::ext_oneapi_level_zero) { + if (getContextImpl().getBackend() != backend::ext_oneapi_level_zero) { throw sycl::exception(errc::backend_mismatch, "the info::device::backend_version info descriptor " "can only be queried with a Level Zero backend"); @@ -731,7 +731,7 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const { Adapter->call(MQueue, &UrNativeDesc, &Handle); - if (getContextImplPtr()->getBackend() == backend::opencl) + if (getContextImpl().getBackend() == backend::opencl) __SYCL_OCL_CALL(clRetainCommandQueue, ur::cast(Handle)); return Handle; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ec7aa71196d8d..43badc3421a5e 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -293,6 +293,7 @@ class queue_impl : public std::enable_shared_from_this { const AdapterPtr &getAdapter() const { return MContext->getAdapter(); } + // TODO: stop using it in existing code. New code must NOT use this! const ContextImplPtr &getContextImplPtr() const { return MContext; } context_impl &getContextImpl() const { return *MContext; } @@ -651,7 +652,7 @@ class queue_impl : public std::enable_shared_from_this { void revisitUnenqueuedCommandsState(const EventImplPtr &CompletedHostTask); static ContextImplPtr getContext(queue_impl *Queue) { - return Queue ? Queue->getContextImplPtr() : nullptr; + return Queue ? Queue->getContextImpl().shared_from_this() : nullptr; } static ContextImplPtr getContext(const QueueImplPtr &Queue) { return getContext(Queue.get()); @@ -984,7 +985,7 @@ class queue_impl : public std::enable_shared_from_this { mutable std::mutex MMutex; device_impl &MDevice; - const ContextImplPtr MContext; + const std::shared_ptr MContext; /// These events are tracked, but not owned, by the queue. std::vector> MEventsWeak; diff --git a/sycl/source/detail/sampler_impl.cpp b/sycl/source/detail/sampler_impl.cpp index 108d83b46cefb..5ce55f3fbdac7 100644 --- a/sycl/source/detail/sampler_impl.cpp +++ b/sycl/source/detail/sampler_impl.cpp @@ -95,10 +95,14 @@ sampler_impl::~sampler_impl() { } ur_sampler_handle_t -sampler_impl::getOrCreateSampler(const ContextImplPtr &ContextImpl) { +sampler_impl::getOrCreateSampler(context_impl &ContextImpl) { + // Just for the `MContextToSampler` lookups. Probably the type of it should be + // changed. + std::shared_ptr ContextImplPtr = ContextImpl.shared_from_this(); + { std::lock_guard Lock(MMutex); - auto It = MContextToSampler.find(ContextImpl); + auto It = MContextToSampler.find(ContextImplPtr); if (It != MContextToSampler.end()) return It->second; } @@ -135,10 +139,10 @@ sampler_impl::getOrCreateSampler(const ContextImplPtr &ContextImpl) { ur_result_t errcode_ret = UR_RESULT_SUCCESS; ur_sampler_handle_t resultSampler = nullptr; - const AdapterPtr &Adapter = ContextImpl->getAdapter(); + const AdapterPtr &Adapter = ContextImpl.getAdapter(); errcode_ret = Adapter->call_nocheck( - ContextImpl->getHandleRef(), &desc, &resultSampler); + ContextImpl.getHandleRef(), &desc, &resultSampler); if (errcode_ret == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) throw sycl::exception(sycl::errc::feature_not_supported, @@ -146,7 +150,7 @@ sampler_impl::getOrCreateSampler(const ContextImplPtr &ContextImpl) { Adapter->checkUrResult(errcode_ret); std::lock_guard Lock(MMutex); - MContextToSampler[ContextImpl] = resultSampler; + MContextToSampler[ContextImplPtr] = resultSampler; return resultSampler; } diff --git a/sycl/source/detail/sampler_impl.hpp b/sycl/source/detail/sampler_impl.hpp index 0abc6a7ad273b..a0d5f704dbf94 100644 --- a/sycl/source/detail/sampler_impl.hpp +++ b/sycl/source/detail/sampler_impl.hpp @@ -46,7 +46,7 @@ class sampler_impl { coordinate_normalization_mode get_coordinate_normalization_mode() const; - ur_sampler_handle_t getOrCreateSampler(const ContextImplPtr &ContextImpl); + ur_sampler_handle_t getOrCreateSampler(context_impl &ContextImpl); ~sampler_impl(); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 41b660cf081e3..09a81462053f6 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -429,7 +429,7 @@ class DispatchHostTask { "Host task submissions should have an associated queue"); interop_handle IH{MReqToMem, HostTask.MQueue, HostTask.MQueue->getDeviceImpl().shared_from_this(), - HostTask.MQueue->getContextImplPtr()}; + HostTask.MQueue->getContextImpl().shared_from_this()}; // TODO: should all the backends that support this entry point use this // for host task? auto &Queue = HostTask.MQueue; @@ -793,7 +793,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep, ContextImplPtr Command::getWorkerContext() const { if (!MQueue) return nullptr; - return MQueue->getContextImplPtr(); + return MQueue->getContextImpl().shared_from_this(); } bool Command::producesPiEvent() const { return true; } @@ -1550,7 +1550,7 @@ void MemCpyCommand::emitInstrumentationData() { ContextImplPtr MemCpyCommand::getWorkerContext() const { if (!MWorkerQueue) return nullptr; - return MWorkerQueue->getContextImplPtr(); + return MWorkerQueue->getContextImpl().shared_from_this(); } bool MemCpyCommand::producesPiEvent() const { @@ -1723,7 +1723,7 @@ void MemCpyCommandHost::emitInstrumentationData() { ContextImplPtr MemCpyCommandHost::getWorkerContext() const { if (!MWorkerQueue) return nullptr; - return MWorkerQueue->getContextImplPtr(); + return MWorkerQueue->getContextImpl().shared_from_this(); } ur_result_t MemCpyCommandHost::enqueueImp() { @@ -2011,7 +2011,7 @@ void instrumentationAddExtraKernelMetadata( // by graph API, when a modifiable graph is finalized. FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - *Queue->getContextImplPtr(), Queue->getDeviceImpl(), KernelName, + Queue->getContextImpl(), Queue->getDeviceImpl(), KernelName, KernelNameBasedCachePtr); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2307,8 +2307,7 @@ void SetArgBasedOnType( const AdapterPtr &Adapter, ur_kernel_handle_t Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, - const ContextImplPtr &ContextImpl, detail::ArgDesc &Arg, - size_t NextTrueIndex) { + context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex) { switch (Arg.MType) { case kernel_param_kind_t::kind_dynamic_work_group_memory: break; @@ -2436,7 +2435,7 @@ static ur_result_t SetKernelParamsAndLaunch( auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc, &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc, - Queue.getContextImplPtr(), Arg, NextTrueIndex); + Queue.getContextImpl(), Arg, NextTrueIndex); }; applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } @@ -2599,7 +2598,7 @@ ur_result_t enqueueImpCommandBufferKernel( &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, size_t NextTrueIndex) { sycl::detail::SetArgBasedOnType(Adapter, UrKernel, DeviceImageImpl, - getMemAllocationFunc, ContextImpl, Arg, + getMemAllocationFunc, *ContextImpl, Arg, NextTrueIndex); }; // Copy args for modification @@ -2672,7 +2671,7 @@ void enqueueImpKernel( detail::kernel_param_desc_t (*KernelParamDescGetter)(int), bool KernelHasSpecialCaptures) { // Run OpenCL kernel - auto &ContextImpl = Queue.getContextImplPtr(); + context_impl &ContextImpl = Queue.getContextImpl(); device_impl &DeviceImpl = Queue.getDeviceImpl(); ur_kernel_handle_t Kernel = nullptr; std::mutex *KernelMutex = nullptr; @@ -2710,7 +2709,7 @@ void enqueueImpKernel( KernelMutex = SyclKernelImpl->getCacheMutex(); } else { KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - *ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); + ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -2722,7 +2721,7 @@ void enqueueImpKernel( // Initialize device globals associated with this. std::vector DeviceGlobalInitEvents = - ContextImpl->initializeDeviceGlobals(Program, Queue); + ContextImpl.initializeDeviceGlobals(Program, Queue); if (!DeviceGlobalInitEvents.empty()) { std::vector EventsWithDeviceGlobalInits; EventsWithDeviceGlobalInits.reserve(RawEvents.size() + @@ -2779,9 +2778,9 @@ ur_result_t enqueueReadWriteHostPipe(queue_impl &Queue, ur_program_handle_t Program = nullptr; device Device = Queue.get_device(); - ContextImplPtr ContextImpl = Queue.getContextImplPtr(); + context_impl &ContextImpl = Queue.getContextImpl(); std::optional CachedProgram = - ContextImpl->getProgramForHostPipe(Device, hostPipeEntry); + ContextImpl.getProgramForHostPipe(Device, hostPipeEntry); if (CachedProgram) Program = *CachedProgram; else { @@ -2999,7 +2998,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { // Queue is created by graph_impl before creating command to submit to // scheduler. const AdapterPtr &Adapter = MQueue->getAdapter(); - auto ContextImpl = MQueue->getContextImplPtr(); + context_impl &ContextImpl = MQueue->getContextImpl(); device_impl &DeviceImpl = MQueue->getDeviceImpl(); // The CUDA & HIP backends don't have the equivalent of barrier @@ -3028,7 +3027,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { false /* profilable*/ }; Adapter->call( - ContextImpl->getHandleRef(), DeviceImpl.getHandleRef(), &Desc, + ContextImpl.getHandleRef(), DeviceImpl.getHandleRef(), &Desc, &ChildCommandBuffer); } @@ -3038,12 +3037,12 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { // available if a user asks for them inside the interop task scope std::vector ReqToMem; const std::vector &HandlerReq = HostTask->getRequirements(); - auto ReqToMemConv = [&ReqToMem, ContextImpl](Requirement *Req) { + auto ReqToMemConv = [&ReqToMem, &ContextImpl](Requirement *Req) { const std::vector &AllocaCmds = Req->MSYCLMemObj->MRecord->MAllocaCommands; for (AllocaCommandBase *AllocaCmd : AllocaCmds) - if (ContextImpl.get() == getContext(AllocaCmd->getQueue())) { + if (&ContextImpl == getContext(AllocaCmd->getQueue())) { auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); @@ -3063,8 +3062,8 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { ur_exp_command_buffer_handle_t InteropCommandBuffer = ChildCommandBuffer ? ChildCommandBuffer : MCommandBuffer; interop_handle IH{std::move(ReqToMem), MQueue, - DeviceImpl.shared_from_this(), ContextImpl, - InteropCommandBuffer}; + DeviceImpl.shared_from_this(), + ContextImpl.shared_from_this(), InteropCommandBuffer}; CommandBufferNativeCommandData CustomOpData{ std::move(IH), HostTask->MHostTask->MInteropTask}; @@ -3465,7 +3464,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { EnqueueNativeCommandData CustomOpData{ interop_handle{std::move(ReqToMem), HostTask->MQueue, HostTask->MQueue->getDeviceImpl().shared_from_this(), - HostTask->MQueue->getContextImplPtr()}, + HostTask->MQueue->getContextImpl().shared_from_this()}, HostTask->MHostTask->MInteropTask}; ur_bool_t NativeCommandSupport = false; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 54610e0720859..eb15dfd6a59f1 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -750,8 +750,7 @@ void SetArgBasedOnType( const detail::AdapterPtr &Adapter, ur_kernel_handle_t Kernel, const std::shared_ptr &DeviceImageImpl, const std::function &getMemAllocationFunc, - const ContextImplPtr &ContextImpl, detail::ArgDesc &Arg, - size_t NextTrueIndex); + context_impl &ContextImpl, detail::ArgDesc &Arg, size_t NextTrueIndex); template void applyFuncOnFilteredArgs(const KernelArgMask *EliminatedArgMask, diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 1ea02f73b3846..aa62c4756dbd2 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -518,7 +518,8 @@ void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind, /// /// \param Ptr is the USM pointer to query /// \param Ctxt is the sycl context the ptr was allocated in -alloc get_pointer_type(const void *Ptr, const context &Ctxt) { +namespace detail { +alloc get_pointer_type(const void *Ptr, context_impl &Ctxt) { if (!Ptr) return alloc::unknown; @@ -559,6 +560,12 @@ alloc get_pointer_type(const void *Ptr, const context &Ctxt) { return ResultAlloc; } +} // namespace detail +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +__SYCL_EXPORT alloc get_pointer_type(const void *Ptr, const context &Ctxt) { + return get_pointer_type(Ptr, *getSyclObjImpl(Ctxt)); +} +#endif /// Queries the device against which the pointer was allocated /// diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index a48f36a960427..03288724c4efc 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -2176,6 +2176,7 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, }); } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES const std::shared_ptr & handler::getContextImplPtr() const { if (auto *Graph = impl->get_graph_or_null()) { @@ -2183,6 +2184,14 @@ handler::getContextImplPtr() const { } return impl->get_queue().getContextImplPtr(); } +#endif + +detail::context_impl &handler::getContextImpl() const { + if (auto *Graph = impl->get_graph_or_null()) { + return *Graph->getContextImplPtr(); + } + return impl->get_queue().getContextImpl(); +} void handler::setKernelCacheConfig(handler::StableKernelCacheConfig Config) { switch (Config) { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6d3abbb03b0f1..32d1339b45c0c 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3298,6 +3298,7 @@ _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6a _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE +_ZN4sycl3_V16detail16get_pointer_typeEPKvRNS1_12context_implE _ZN4sycl3_V16detail16reduGetMaxWGSizeERNS0_7handlerEm _ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail17HostProfilingInfo3endEv @@ -4082,6 +4083,7 @@ _ZNK4sycl3_V17context8get_infoINS0_4info7context7devicesEEENS0_6detail20is_conte _ZNK4sycl3_V17context8get_infoINS0_4info7context8platformEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context9getNativeEv _ZNK4sycl3_V17handler11eventNeededEv +_ZNK4sycl3_V17handler14getContextImplEv _ZNK4sycl3_V17handler15getCommandGraphEv _ZNK4sycl3_V17handler16getDeviceBackendEv _ZNK4sycl3_V17handler17getContextImplPtrEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ac99be10319ae..cf290718520c7 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -4054,6 +4054,7 @@ ?getChannelType@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ ?getChannelType@image_plain@detail@_V1@sycl@@IEBA?AW4image_channel_type@34@XZ ?getCommandGraph@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@XZ +?getContextImpl@handler@_V1@sycl@@AEBAAEAVcontext_impl@detail@23@XZ ?getContextImplPtr@handler@_V1@sycl@@AEBAAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?getDeviceBackend@handler@_V1@sycl@@AEBA?AW4backend@23@XZ @@ -4205,6 +4206,7 @@ ?get_platforms@platform@_V1@sycl@@SA?AV?$vector@Vplatform@_V1@sycl@@V?$allocator@Vplatform@_V1@sycl@@@std@@@std@@XZ ?get_pointer_device@_V1@sycl@@YA?AVdevice@12@PEBXAEBVcontext@12@@Z ?get_pointer_type@_V1@sycl@@YA?AW4alloc@usm@12@PEBXAEBVcontext@12@@Z +?get_pointer_type@detail@_V1@sycl@@YA?AW4alloc@usm@23@PEBXAEAVcontext_impl@123@@Z ?get_precision@stream@_V1@sycl@@QEBA_KXZ ?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ