diff --git a/.github/workflows/sycl-linux-precommit.yml b/.github/workflows/sycl-linux-precommit.yml index e14649f904521..92eaf0ed7625e 100644 --- a/.github/workflows/sycl-linux-precommit.yml +++ b/.github/workflows/sycl-linux-precommit.yml @@ -81,20 +81,6 @@ jobs: fail-fast: false matrix: include: - - name: GEN 12 Integrated - runner: '["Linux", "gen12"]' - image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN - target_devices: level_zero:gpu;opencl:gpu;opencl:cpu - extra_lit_opts: --param gpu-intel-gen12=True - - name: NVIDIA/CUDA - runner: '["Linux", "cuda"]' - image_options: -u 1001 --gpus all --cap-add SYS_ADMIN - target_devices: cuda:gpu - - name: AMD/HIP - runner: '["Linux", "amdgpu"]' - image_options: -u 1001 --device=/dev/dri --device=/dev/kfd - target_devices: hip:gpu - extra_lit_opts: -j 1 - name: Intel Arc A-Series Graphics runner: '["Linux", "arc"]' image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN @@ -107,18 +93,6 @@ jobs: target_devices: level_zero:gpu;opencl:gpu extra_lit_opts: --param matrix-xmx8=True use_igc_dev: true - - name: E2E tests on Intel Ponte Vecchio GPU - runner: '["Linux", "pvc"]' - image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN - target_devices: level_zero:gpu;opencl:gpu - extra_lit_opts: -j 50 - - name: Dev IGC on Intel Ponte Vecchio GPU - runner: '["Linux", "pvc"]' - image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:devigc - image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN - target_devices: level_zero:gpu;opencl:gpu - use_igc_dev: true - extra_lit_opts: -j 50 - name: Intel Battlemage Graphics runner: '["Linux", "bmg"]' image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN diff --git a/.github/workflows/sycl-windows-precommit.yml b/.github/workflows/sycl-windows-precommit.yml index 40f25e873764c..79f9d69012123 100644 --- a/.github/workflows/sycl-windows-precommit.yml +++ b/.github/workflows/sycl-windows-precommit.yml @@ -77,8 +77,6 @@ jobs: fail-fast: false matrix: include: - - name: Intel GEN12 Graphics with Level Zero - runner: '["Windows","gen12"]' - name: Intel Battlemage Graphics with Level Zero runner: '["Windows","bmg"]' uses: ./.github/workflows/sycl-windows-run-tests.yml diff --git a/.github/workflows/ur-precommit.yml b/.github/workflows/ur-precommit.yml index 09af48933f046..2329a74b35402 100644 --- a/.github/workflows/ur-precommit.yml +++ b/.github/workflows/ur-precommit.yml @@ -40,15 +40,9 @@ jobs: name: Detect Changes uses: ./.github/workflows/sycl-detect-changes.yml - source_checks: - name: Source Checks - needs: [detect_changes] - if: ${{ always() && !cancelled() && contains(needs.detect_changes.outputs.filters, 'ur') }} - uses: ./.github/workflows/ur-source-checks.yml - adapters: name: Adapters - needs: [detect_changes, source_checks] + needs: [detect_changes] if: ${{ always() && !cancelled() && contains(needs.detect_changes.outputs.filters, 'ur') }} strategy: matrix: diff --git a/unified-runtime/scripts/generate_code.py b/unified-runtime/scripts/generate_code.py index 62f6ab3de5065..c9e282c32d263 100644 --- a/unified-runtime/scripts/generate_code.py +++ b/unified-runtime/scripts/generate_code.py @@ -223,25 +223,6 @@ def _mako_loader_cpp(path, namespace, tags, version, specs, meta): "make_loader_cpp path %s namespace %s version %s\n" % (path, namespace, version) ) loc = 0 - template = "ldrddi.hpp.mako" - fin = os.path.join(templates_dir, template) - - name = "%s_ldrddi" % (namespace) - filename = "%s.hpp" % (name) - fout = os.path.join(path, filename) - - print("Generating %s..." % fout) - loc += util.makoWrite( - fin, - fout, - name=name, - ver=version, - namespace=namespace, - tags=tags, - specs=specs, - meta=meta, - ) - template = "ldrddi.cpp.mako" fin = os.path.join(templates_dir, template) diff --git a/unified-runtime/scripts/templates/helper.py b/unified-runtime/scripts/templates/helper.py index f6a0eec09af73..13b5ef25b2d78 100644 --- a/unified-runtime/scripts/templates/helper.py +++ b/unified-runtime/scripts/templates/helper.py @@ -761,6 +761,7 @@ def get_adapter_manifests(specs): objs.append(obj) return objs + """ Public: returns a list of all loader API functions' names @@ -1510,39 +1511,6 @@ def get_initial_null_set(obj): return "" -""" -Public: - returns true if the function always wraps output pointers in loader handles -""" - - -def always_wrap_outputs(obj): - cname = obj_traits.class_name(obj) - return (cname, obj["name"]) in [ - ("$xProgram", "Link"), - ("$xProgram", "LinkExp"), - ] - - -""" -Private: - returns the list of parameters, filtering based on desc tags -""" - - -def _filter_param_list(params, filters1=["[in]", "[in,out]", "[out]"], filters2=[""]): - lst = [] - for p in params: - for f1 in filters1: - if f1 in p["desc"]: - for f2 in filters2: - if f2 in p["desc"]: - lst.append(p) - break - break - return lst - - """ Public: returns a list of dict of each pfntables needed @@ -1560,131 +1528,6 @@ def get_pfncbtables(specs, meta, namespace, tags): return tables -""" -Public: - returns a list of dict for converting loader input parameters -""" - - -def get_loader_prologue(namespace, tags, obj, meta): - prologue = [] - - params = _filter_param_list(obj["params"], ["[in]"]) - for item in params: - if param_traits.is_mbz(item): - continue - if type_traits.is_class_handle(item["type"], meta): - name = subt(namespace, tags, item["name"]) - tname = _remove_const_ptr(subt(namespace, tags, item["type"])) - - # e.g., "xe_device_handle_t" -> "xe_device_object_t" - obj_name = re.sub(r"(\w+)_handle_t", r"\1_object_t", tname) - fty_name = re.sub(r"(\w+)_handle_t", r"\1_factory", tname) - - if type_traits.is_pointer(item["type"]): - range_start = param_traits.range_start(item) - range_end = param_traits.range_end(item) - prologue.append( - { - "name": name, - "obj": obj_name, - "range": (range_start, range_end), - "type": tname, - "factory": fty_name, - "pointer": "*", - } - ) - else: - prologue.append( - { - "name": name, - "obj": obj_name, - "optional": param_traits.is_optional(item), - "pointer": "", - } - ) - - return prologue - - -""" -Private: - Takes a list of struct members and recursively searches for class handles. - Returns a list of class handles with access chains to reach them (e.g. - "struct_a->struct_b.handle"). Also handles ranges of class handles and - ranges of structs with class handle members, although the latter only works - to one level of recursion i.e. a range of structs with a range of structs - with a handle member will not work. -""" - - -def get_struct_handle_members( - namespace, tags, meta, members, parent="", is_struct_range=False -): - handle_members = [] - for m in members: - if type_traits.is_class_handle(m["type"], meta): - m_tname = _remove_const_ptr(subt(namespace, tags, m["type"])) - m_objname = re.sub(r"(\w+)_handle_t", r"\1_object_t", m_tname) - # We can deal with a range of handles, but not if it's in a range of structs - if param_traits.is_range(m) and not is_struct_range: - handle_members.append( - { - "parent": parent, - "name": m["name"], - "obj_name": m_objname, - "type": m_tname, - "range_start": param_traits.range_start(m), - "range_end": param_traits.range_end(m), - } - ) - else: - handle_members.append( - { - "parent": parent, - "name": m["name"], - "obj_name": m_objname, - "optional": param_traits.is_optional(m), - } - ) - elif type_traits.is_struct(m["type"], meta): - member_struct_members = type_traits.get_struct_members(m["type"], meta) - if param_traits.is_range(m): - # If we've hit a range of structs we need to start a new recursion looking - # for handle members. We do not support range within range, so skip that - if is_struct_range: - continue - range_handle_members = get_struct_handle_members( - namespace, tags, meta, member_struct_members, "", True - ) - if range_handle_members: - handle_members.append( - { - "parent": parent, - "name": m["name"], - "type": subt(namespace, tags, _remove_const_ptr(m["type"])), - "range_start": param_traits.range_start(m), - "range_end": param_traits.range_end(m), - "handle_members": range_handle_members, - } - ) - else: - # If it's just a struct we can keep recursing in search of handles - m_is_pointer = type_traits.is_pointer(m["type"]) - new_parent_deref = "->" if m_is_pointer else "." - new_parent = m["name"] + new_parent_deref - handle_members += get_struct_handle_members( - namespace, - tags, - meta, - member_struct_members, - new_parent, - is_struct_range, - ) - - return handle_members - - """ Public: Strips a string of all dereferences. @@ -1702,37 +1545,6 @@ def strip_deref(string_to_strip): return string_to_strip.replace("->", "") -""" -Public: - Takes a function object and recurses through its struct parameters to return - a list of structs that have handle object members the loader will need to - convert. -""" - - -def get_object_handle_structs_to_convert(namespace, tags, obj, meta): - structs = [] - params = _filter_param_list(obj["params"], ["[in]"]) - - for item in params: - if type_traits.is_struct(item["type"], meta): - members = type_traits.get_struct_members(item["type"], meta) - handle_members = get_struct_handle_members(namespace, tags, meta, members) - if handle_members: - name = subt(namespace, tags, item["name"]) - tname = _remove_const_ptr(subt(namespace, tags, item["type"])) - struct = { - "name": name, - "type": tname, - "optional": param_traits.is_optional(item), - "members": handle_members, - } - - structs.append(struct) - - return structs - - """ Public: returns an enum object with the given name @@ -2039,3 +1851,33 @@ def get_etors(obj): if etor_traits.is_deprecated_etor(item): continue yield item + + +""" +Public: + Returns the first non-optional non-native handle for the given function. + + If it is a range, `name[0]` will be returned instead of `name`. +""" + + +def get_dditable_field(obj): + for p in obj["params"]: + if param_traits.is_optional(p): + continue + if "native_handle_t" in p["type"]: + continue + + if param_traits.is_range(p): + if not p["type"].endswith("_handle_t*"): + continue + return p["name"] + "[0]" + else: + if not p["type"].endswith("_handle_t"): + continue + return p["name"] + obj_class = obj["class"] + name = obj["name"] + raise RuntimeError( + f"Function {obj_class}::{name} does not have a non-optional handle argument" + ) diff --git a/unified-runtime/scripts/templates/ldrddi.cpp.mako b/unified-runtime/scripts/templates/ldrddi.cpp.mako index 5fff46d725dd2..fb038281ec3fb 100644 --- a/unified-runtime/scripts/templates/ldrddi.cpp.mako +++ b/unified-runtime/scripts/templates/ldrddi.cpp.mako @@ -33,24 +33,6 @@ namespace ur_loader else: func_basename = func_name %> - %if func_basename == "EventSetCallback": - namespace { - struct event_callback_wrapper_data_t { - ${x}_event_callback_t fn; - ${x}_event_handle_t event; - void *userData; - }; - - void event_callback_wrapper([[maybe_unused]] ${x}_event_handle_t hEvent, - ${x}_execution_info_t execStatus, void *pUserData) { - auto *wrapper = - reinterpret_cast(pUserData); - (wrapper->fn)(wrapper->event, execStatus, wrapper->userData); - delete wrapper; - } - } - - %endif /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for ${func_name} %if 'condition' in obj: @@ -62,13 +44,10 @@ namespace ur_loader %endfor ) { - ${x}_result_t result = ${X}_RESULT_SUCCESS;<% - add_local = False - %>${th.get_initial_null_set(obj)} - - [[maybe_unused]] auto context = getContext(); + ${th.get_initial_null_set(obj)} %if func_basename == "AdapterGet": - + auto context = getContext(); + size_t adapterIndex = 0; if( nullptr != ${obj['params'][1]['name']} && ${obj['params'][0]['name']} !=0) { @@ -76,18 +55,7 @@ namespace ur_loader { if(platform.initStatus != ${X}_RESULT_SUCCESS) continue; - platform.dditable.${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( 1, &${obj['params'][1]['name']}[adapterIndex], nullptr ); - try - { - ${obj['params'][1]['name']}[adapterIndex] = reinterpret_cast<${n}_adapter_handle_t>(context->factories.${n}_adapter_factory.getInstance( - ${obj['params'][1]['name']}[adapterIndex], &platform.dditable - )); - } - catch( std::bad_alloc &) - { - result = ${X}_RESULT_ERROR_OUT_OF_HOST_MEMORY; - break; - } + platform.dditable.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( 1, &${obj['params'][1]['name']}[adapterIndex], nullptr ); adapterIndex++; if (adapterIndex == NumEntries) { break; @@ -100,286 +68,17 @@ namespace ur_loader *${obj['params'][2]['name']} = static_cast(context->platforms.size()); } - %elif func_basename == "PlatformGet": - - // extract adapter's function pointer table - auto dditable = - reinterpret_cast<${n}_platform_object_t *>( ${obj['params'][0]['name']})->dditable; - - uint32_t library_platform_handle_count = 0; - - result = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( ${obj['params'][0]['name']}, 0, nullptr, &library_platform_handle_count ); - if( ${X}_RESULT_SUCCESS != result ) return result; - - if( nullptr != ${obj['params'][2]['name']} && ${obj['params'][1]['name']} !=0) - { - if( library_platform_handle_count > ${obj['params'][1]['name']}) { - library_platform_handle_count = ${obj['params'][1]['name']}; - } - result = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( ${obj['params'][0]['name']}, library_platform_handle_count, ${obj['params'][2]['name']}, nullptr ); - if( ${X}_RESULT_SUCCESS != result ) return result; - - try - { - for( uint32_t i = 0; i < library_platform_handle_count; ++i ) { - ${obj['params'][2]['name']}[ i ] = reinterpret_cast<${n}_platform_handle_t>( - context->factories.${n}_platform_factory.getInstance( ${obj['params'][2]['name']}[ i ], dditable ) ); - } - } - catch( std::bad_alloc& ) - { - result = ${X}_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - } - - if( ${X}_RESULT_SUCCESS == result && ${obj['params'][3]['name']} != nullptr ) - *${obj['params'][3]['name']} = library_platform_handle_count; - + return ${X}_RESULT_SUCCESS; %else: - <%param_replacements={}%> - %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): - %if not '_native_object_' in item['obj']: - // extract platform's function pointer table - auto dditable = reinterpret_cast<${item['obj']}*>( ${item['pointer']}${item['name']} )->dditable; - auto ${th.make_pfn_name(n, tags, obj)} = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; + auto *dditable = *reinterpret_cast<${x}_dditable_t **>(${th.get_dditable_field(obj)}); + + auto *${th.make_pfn_name(n, tags, obj)} = dditable->${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) return ${X}_RESULT_ERROR_UNINITIALIZED; - <%break%> - %endif - %endfor - %if func_basename == "EventSetCallback": - - // Replace the callback with a wrapper function that gives the callback the loader event rather than a - // backend-specific event - auto *wrapper_data = - new event_callback_wrapper_data_t{pfnNotify, hEvent, pUserData}; - pUserData = wrapper_data; - pfnNotify = event_callback_wrapper; - - %endif - %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): - %if 'range' in item: - <% - add_local = True - param_replacements[item['name']] = item['name'] + 'Local.data()'%>// convert loader handles to platform handles - auto ${item['name']}Local = std::vector<${item['type']}>(${item['range'][1]}); - for( size_t i = ${item['range'][0]}; i < ${item['range'][1]}; ++i ) - ${item['name']}Local[ i ] = reinterpret_cast<${item['obj']}*>( ${item['name']}[ i ] )->handle; - %else: - %if not '_native_object_' in item['obj']: - // convert loader handle to platform handle - %if item['optional']: - ${item['name']} = ( ${item['name']} ) ? reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle : nullptr; - %else: - ${item['name']} = reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle; - %endif - %endif - %endif - - %endfor - - <% - epilogue = th.get_loader_epilogue(specs, n, tags, obj, meta) - has_typename = False - for item in epilogue: - if 'typename' in item: - has_typename = True - break - %> - - %if has_typename: - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - %endif - - ## Here we deal with handles buried inside struct type parameters. First - ## we create a local copy of the struct, then we convert all the handles - ## in that local copy and set the parameter to point to it before forwarding - ## it to the final API call. - <% handle_structs = th.get_object_handle_structs_to_convert(n, tags, obj, meta) %> - %if handle_structs: - // Deal with any struct parameters that have handle members we need to convert. - %if func_basename == "CommandBufferUpdateKernelLaunchExp": - ## CommandBufferUpdateKernelLaunchExp entry-point takes a list of structs with - ## handle members, as well as members defining a nested list of structs - ## containing handles. This usage is not supported yet, so special case as - ## a temporary measure. - std::vector pUpdateKernelLaunchVector = {}; - std::vector> - ppUpdateKernelLaunchpNewMemObjArgList(numKernelUpdates); - for (size_t Offset = 0; Offset < numKernelUpdates; Offset ++) { - %endif - %for struct in handle_structs: - %if struct['optional']: - ${struct['type']} ${struct['name']}Local = {}; - if(${struct['name']}) - ${struct['name']}Local = *${struct['name']}; - %else: - auto ${struct['name']}Local = *${struct['name']}; - %endif - %endfor - - %for struct in handle_structs: - %for member in struct['members']: - ## If this member has a handle_members field that means it's a range of - ## structs which each contain a handle to convert. - %if 'handle_members' in member: - ## we use the parent info stripped of derefs for a unique variable name - <% - parent_no_deref = th.strip_deref(member['parent']) - range_vector_name = struct['name'] + parent_no_deref + member['name'] - ## we need to check if range bounds are literals or variables: variables - ## need the full reference chain prepended to them - range_start = member['range_start'] - if not re.match(r"[0-9]+$", range_start): - range_start = struct['name'] + "->" + member['parent'] + range_start - range_end = member['range_end'] - if not re.match(r"[0-9]+$", range_end): - range_end = struct['name'] + "->" + member['parent'] + range_end %> - - %if func_basename == "CommandBufferUpdateKernelLaunchExp": - std::vector& - pUpdateKernelLaunchpNewMemObjArgList = ppUpdateKernelLaunchpNewMemObjArgList[Offset]; - %else: - std::vector<${member['type']}> ${range_vector_name}; - %endif - for(uint32_t i = ${range_start}; i < ${range_end}; i++) { - ${member['type']} NewRangeStruct = ${struct['name']}Local.${member['parent']}${member['name']}[i]; - %for handle_member in member['handle_members']: - %if handle_member['optional']: - if(NewRangeStruct.${handle_member['parent']}${handle_member['name']}) - %endif - NewRangeStruct.${handle_member['parent']}${handle_member['name']} = - reinterpret_cast<${handle_member['obj_name']}*>( - NewRangeStruct.${handle_member['parent']}${handle_member['name']}) - ->handle; - %endfor - - ${range_vector_name}.push_back(NewRangeStruct); - } - ${struct['name']}Local.${member['parent']}${member['name']} = ${range_vector_name}.data(); - ## If the member has range_start then its a range of handles - %elif 'range_start' in member: - ## we use the parent info stripped of derefs for a unique variable name - <% - parent_no_deref = th.strip_deref(member['parent']) - range_vector_name = struct['name'] + parent_no_deref + member['name'] %> - std::vector<${member['type']}> ${range_vector_name}; - for(uint32_t i = 0;i < ${struct['name']}->${member['parent']}${member['range_end']};i++) { - ${range_vector_name}.push_back(reinterpret_cast<${member['obj_name']}*>(${struct['name']}->${member['parent']}${member['name']}[i])->handle); - } - ${struct['name']}Local.${member['parent']}${member['name']} = ${range_vector_name}.data(); - %else: - %if member['optional']: - if(${struct['name']}Local.${member['parent']}${member['name']}) - %endif - ${struct['name']}Local.${member['parent']}${member['name']} = - reinterpret_cast<${member['obj_name']}*>( - ${struct['name']}Local.${member['parent']}${member['name']})->handle; - %endif - %endfor - %endfor - - %if func_basename == "CommandBufferUpdateKernelLaunchExp": - pUpdateKernelLaunchVector.push_back(pUpdateKernelLaunchLocal); - pUpdateKernelLaunch++; - } - pUpdateKernelLaunch = pUpdateKernelLaunchVector.data(); - %else: - // Now that we've converted all the members update the param pointers - %for struct in handle_structs: - %if struct['optional']: - if(${struct['name']}) - %endif - ${struct['name']} = &${struct['name']}Local; - %endfor - %endif - %endif - // forward to device-platform - %if add_local: - result = ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name", "local"], replacements=param_replacements))} ); - %else: - result = ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - %endif -<% - del param_replacements - del add_local - %> - %for i, item in enumerate(epilogue): - %if 0 == i and not item['release'] and not item['retain'] and not th.always_wrap_outputs(obj): - ## TODO: Remove once we have a concrete way for submitting warnings in place. - %if re.match(r"Enqueue\w+", func_basename): - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. - if( ${X}_RESULT_SUCCESS != result && ${X}_RESULT_ERROR_ADAPTER_SPECIFIC != result ) - return result; - %else: - if( ${X}_RESULT_SUCCESS != result) - return result; - - %endif - %endif - ## Possibly handle release/retain ref counting - there are no ur_exp-image factories - %if 'factory' in item and '_exp_image_' not in item['factory']: - %if item['release']: - // release loader handle - context->factories.${item['factory']}.release( ${item['name']} ); - %endif - %if item['retain']: - // increment refcount of handle - context->factories.${item['factory']}.retain( ${item['name']} ); - %endif - %endif - %if not item['release'] and not item['retain'] and not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': - try - { - %if 'typename' in item: - if (${item['name']} != nullptr) { - switch (${item['typename']}) { - %for etor in item['etors']: - case ${etor['name']}: { - ${etor['type']} *handles = reinterpret_cast<${etor['type']} *>(${item['name']}); - size_t nelements = *pPropSizeRet / sizeof(${etor['type']}); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast<${etor['type']}>( - context->factories.${etor['factory']}.getInstance( handles[i], dditable ) ); - } - } - } break; - %endfor - default: {} break; - } - } - %elif 'range' in item: - // convert platform handles to loader handles - for( size_t i = ${item['range'][0]}; ( nullptr != ${item['name']} ) && ( i < ${item['range'][1]} ); ++i ) - ${item['name']}[ i ] = reinterpret_cast<${item['type']}>( - context->factories.${item['factory']}.getInstance( ${item['name']}[ i ], dditable ) ); - %else: - // convert platform handle to loader handle - %if item['optional'] or th.always_wrap_outputs(obj): - if( nullptr != ${item['name']} ) - *${item['name']} = reinterpret_cast<${item['type']}>( - context->factories.${item['factory']}.getInstance( *${item['name']}, dditable ) ); - %else: - *${item['name']} = reinterpret_cast<${item['type']}>( - context->factories.${item['factory']}.getInstance( *${item['name']}, dditable ) ); - %endif - %endif - } - catch( std::bad_alloc& ) - { - result = ${X}_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - %endif - - %endfor + return ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); %endif - return result; } %if 'condition' in obj: #endif // ${th.subt(n, tags, obj['condition'])} @@ -430,7 +129,7 @@ ${tbl['export']['name']}( ur_loader::LibLoader::getFunctionPtr(platform.handle.get(), "${tbl['export']['name']}")); if(!getTable) continue; - platform.initStatus = getTable( version, &platform.dditable.${n}.${tbl['name']}); + platform.initStatus = getTable( version, &platform.dditable.${tbl['name']}); } if( ${X}_RESULT_SUCCESS == result ) @@ -453,7 +152,7 @@ ${tbl['export']['name']}( else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.${n}.${tbl['name']}; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.${tbl['name']}; } } diff --git a/unified-runtime/scripts/templates/ldrddi.hpp.mako b/unified-runtime/scripts/templates/ldrddi.hpp.mako deleted file mode 100644 index 6502504af221d..0000000000000 --- a/unified-runtime/scripts/templates/ldrddi.hpp.mako +++ /dev/null @@ -1,57 +0,0 @@ -<%! -import re -from templates import helper as th -%><% - n=namespace - N=n.upper() - - x=tags['$x'] - X=x.upper() -%>/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. - * See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file ${name}.hpp - * - */ -#ifndef UR_LOADER_LDRDDI_H -#define UR_LOADER_LDRDDI_H 1 - -#include "${x}_object.hpp" -#include "${x}_singleton.hpp" - -namespace ur_loader -{ - /////////////////////////////////////////////////////////////////////////////// - <% - factories = [] - %> - %for obj in th.get_adapter_handles(specs): - %if 'class' in obj: - <% - _handle_t = th.subt(n, tags, obj['name']) - _object_t = re.sub(r"(\w+)_handle_t", r"\1_object_t", _handle_t) - _factory_t = re.sub(r"(\w+)_handle_t", r"\1_factory_t", _handle_t) - _factory = re.sub(r"(\w+)_handle_t", r"\1_factory", _handle_t) - factories.append((_factory_t, _factory)) - %>using ${th.append_ws(_object_t, 35)} = object_t < ${_handle_t} >; - using ${th.append_ws(_factory_t, 35)} = singleton_factory_t < ${_object_t}, ${_handle_t} >; - - %endif - %endfor - - struct handle_factories { - %for (f_t, f) in factories: - ${f_t} ${f}; - %endfor - }; - -} - -#endif /* UR_LOADER_LDRDDI_H */ diff --git a/unified-runtime/scripts/templates/ur_interface_loader.cpp.mako b/unified-runtime/scripts/templates/ur_interface_loader.cpp.mako index 125460be789c1..830e0eb6154c1 100644 --- a/unified-runtime/scripts/templates/ur_interface_loader.cpp.mako +++ b/unified-runtime/scripts/templates/ur_interface_loader.cpp.mako @@ -20,6 +20,7 @@ from templates import helper as th //===----------------------------------------------------------------------===// #include <${n}_api.h> #include <${n}_ddi.h> +#include #include "ur_interface_loader.hpp" @@ -68,22 +69,45 @@ ${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}( } // extern "C" #endif -#ifdef UR_STATIC_ADAPTER_${Adapter} -namespace ur::${adapter} { -ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { - if (ddi == nullptr) { +namespace { +ur_result_t populateDdiTable(ur_dditable_t *ddi) { + if (ddi == nullptr) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } ur_result_t result; +#ifdef UR_STATIC_ADAPTER_${Adapter} +#define NAMESPACE_ ::ur::${adapter} +#else +#define NAMESPACE_ +#endif + %for tbl in th.get_pfntables(specs, meta, n, tags): - result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); + result = NAMESPACE_::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); if (result != UR_RESULT_SUCCESS) return result; %endfor +#undef NAMESPACE_ + return result; } } + + +namespace ur::${adapter} { +const ${x}_dditable_t *ddi_getter::value() { + static std::once_flag flag; + static ${x}_dditable_t table; + + std::call_once(flag, []() { populateDdiTable(&table); }); + return &table; +} + +#ifdef UR_STATIC_ADAPTER_${Adapter} +ur_result_t urAdapterGetDdiTables(${x}_dditable_t *ddi) { + return populateDdiTable(ddi); +} #endif +} diff --git a/unified-runtime/scripts/templates/ur_interface_loader.hpp.mako b/unified-runtime/scripts/templates/ur_interface_loader.hpp.mako index 48bcec4794038..2ebc2949f44dd 100644 --- a/unified-runtime/scripts/templates/ur_interface_loader.hpp.mako +++ b/unified-runtime/scripts/templates/ur_interface_loader.hpp.mako @@ -18,6 +18,8 @@ from templates import helper as th // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once + #include <${n}_api.h> #include <${n}_ddi.h> @@ -36,4 +38,8 @@ ${x}_result_t ${th.make_func_name(n, tags, obj)}( #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif + +struct ddi_getter { + const static ${x}_dditable_t *value(); +}; } diff --git a/unified-runtime/source/adapters/cuda/adapter.cpp b/unified-runtime/source/adapters/cuda/adapter.cpp index 2983e98de48b7..6fe11cc5f9061 100644 --- a/unified-runtime/source/adapters/cuda/adapter.cpp +++ b/unified-runtime/source/adapters/cuda/adapter.cpp @@ -38,7 +38,8 @@ class ur_legacy_sink : public logger::Sink { // through UR entry points. // https://github.com/oneapi-src/unified-runtime/issues/1330 ur_adapter_handle_t_::ur_adapter_handle_t_() - : logger(logger::get_logger("cuda", + : handle_base(), + logger(logger::get_logger("cuda", /*default_log_level*/ UR_LOGGER_LEVEL_ERROR)) { Platform = std::make_unique(); diff --git a/unified-runtime/source/adapters/cuda/adapter.hpp b/unified-runtime/source/adapters/cuda/adapter.hpp index 2f6ff157ae9d1..7bb5988943437 100644 --- a/unified-runtime/source/adapters/cuda/adapter.hpp +++ b/unified-runtime/source/adapters/cuda/adapter.hpp @@ -19,7 +19,7 @@ #include #include -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : public ur::cuda::handle_base { std::atomic RefCount = 0; struct cuda_tracing_context_t_ *TracingCtx = nullptr; logger::Logger &logger; diff --git a/unified-runtime/source/adapters/cuda/command_buffer.cpp b/unified-runtime/source/adapters/cuda/command_buffer.cpp index 59e8e35291060..8aa68cc2269ae 100644 --- a/unified-runtime/source/adapters/cuda/command_buffer.cpp +++ b/unified-runtime/source/adapters/cuda/command_buffer.cpp @@ -100,16 +100,12 @@ ur_result_t ur_exp_command_buffer_handle_t_::addWaitNodes( return Err; } -kernel_command_handle::kernel_command_handle( - ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, +kernel_command_data::kernel_command_data( + ur_kernel_handle_t Kernel, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, - ur_kernel_handle_t *KernelAlternatives, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes), - Kernel(Kernel), Params(Params), WorkDim(WorkDim) { + ur_kernel_handle_t *KernelAlternatives) + : Kernel(Kernel), Params(Params), WorkDim(WorkDim) { const size_t CopySize = sizeof(size_t) * WorkDim; std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); @@ -191,8 +187,8 @@ static void setCopyParams(const void *SrcPtr, const CUmemorytype_enum SrcType, } // Helper function for enqueuing memory fills. Templated on the CommandType -// enum class for the type of fill being created. -template +// variant for the type of fill being created. +template static ur_result_t enqueueCommandBufferFillHelper( ur_exp_command_buffer_handle_t CommandBuffer, void *DstDevice, const CUmemorytype_enum DstType, const void *Pattern, size_t PatternSize, @@ -331,8 +327,9 @@ static ur_result_t enqueueCommandBufferFillHelper( std::vector WaitNodes = NumEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique(CommandBuffer, GraphNode, SignalNode, - WaitNodes, std::move(DecomposedNodes)); + auto NewCommand = std::make_unique( + CT, CommandBuffer, GraphNode, SignalNode, WaitNodes, + fill_command_data{std::move(DecomposedNodes)}); if (RetCommand) { *RetCommand = NewCommand.get(); } @@ -528,10 +525,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, hKernel, GraphNode, NodeParams, workDim, - pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, - numKernelAlternatives, phKernelAlternatives, SignalNode, WaitNodes); + auto KernelData = kernel_command_data{hKernel, + NodeParams, + workDim, + pGlobalWorkOffset, + pGlobalWorkSize, + pLocalWorkSize, + numKernelAlternatives, + phKernelAlternatives}; + auto NewCommand = std::make_unique( + CommandType::Kernel, hCommandBuffer, GraphNode, SignalNode, WaitNodes, + KernelData); if (phCommand) { *phCommand = NewCommand.get(); @@ -585,8 +589,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::USMMemcpy, hCommandBuffer, GraphNode, SignalNode, WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); } @@ -650,8 +654,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::MemBufferCopy, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); @@ -713,8 +718,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::MemBufferCopyRect, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); @@ -772,8 +778,9 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::MemBufferWrite, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); } @@ -829,8 +836,9 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::MemBufferRead, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); } @@ -890,8 +898,9 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::MemBufferWriteRect, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); @@ -952,8 +961,9 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::MemBufferReadRect, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); @@ -1006,8 +1016,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::USMPrefetch, hCommandBuffer, GraphNode, SignalNode, + WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); @@ -1060,8 +1071,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( std::vector WaitNodes = numEventsInWaitList ? std::move(DepsList) : std::vector(); - auto NewCommand = std::make_unique( - hCommandBuffer, GraphNode, SignalNode, WaitNodes); + auto NewCommand = std::make_unique( + CommandType::USMAdvise, hCommandBuffer, GraphNode, SignalNode, WaitNodes); if (phCommand) { *phCommand = NewCommand.get(); @@ -1096,7 +1107,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( auto DstDevice = std::get(hBuffer->Mem) .getPtrWithOffset(hCommandBuffer->Device, offset); - return enqueueCommandBufferFillHelper( + return enqueueCommandBufferFillHelper( hCommandBuffer, &DstDevice, CU_MEMORYTYPE_DEVICE, pPattern, patternSize, size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, phEvent, phCommand); @@ -1116,7 +1127,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( (patternSize > 0); // is a positive power of two UR_ASSERT(PatternIsValid && PatternSizeIsValid, UR_RESULT_ERROR_INVALID_SIZE); - return enqueueCommandBufferFillHelper( + return enqueueCommandBufferFillHelper( hCommandBuffer, pPtr, CU_MEMORYTYPE_UNIFIED, pPattern, patternSize, size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, phEvent, phCommand); @@ -1165,12 +1176,12 @@ ur_result_t validateCommandDesc(ur_exp_command_buffer_handle_t CommandBuffer, const ur_exp_command_buffer_update_kernel_launch_desc_t &UpdateCommandDesc) { - if (UpdateCommandDesc.hCommand->getCommandType() != CommandType::Kernel) { + if (UpdateCommandDesc.hCommand->Type != CommandType::Kernel) { return UR_RESULT_ERROR_INVALID_VALUE; } - auto Command = - static_cast(UpdateCommandDesc.hCommand); + auto *Command = UpdateCommandDesc.hCommand; + auto &KernelData = std::get(Command->CommandData); if (CommandBuffer != Command->CommandBuffer) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP; } @@ -1180,14 +1191,14 @@ validateCommandDesc(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_ERROR_INVALID_OPERATION; } - if (UpdateCommandDesc.newWorkDim != Command->WorkDim && + if (UpdateCommandDesc.newWorkDim != KernelData.WorkDim && (!UpdateCommandDesc.pNewGlobalWorkOffset || !UpdateCommandDesc.pNewGlobalWorkSize)) { return UR_RESULT_ERROR_INVALID_VALUE; } if (UpdateCommandDesc.hNewKernel && - !Command->ValidKernelHandles.count(UpdateCommandDesc.hNewKernel)) { + !KernelData.ValidKernelHandles.count(UpdateCommandDesc.hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } return UR_RESULT_SUCCESS; @@ -1202,9 +1213,9 @@ validateCommandDesc(ur_exp_command_buffer_handle_t CommandBuffer, ur_result_t updateKernelArguments(const ur_exp_command_buffer_update_kernel_launch_desc_t &UpdateCommandDesc) { - auto Command = - static_cast(UpdateCommandDesc.hCommand); - ur_kernel_handle_t Kernel = Command->Kernel; + auto *Command = UpdateCommandDesc.hCommand; + auto &KernelData = std::get(Command->CommandData); + ur_kernel_handle_t Kernel = KernelData.Kernel; ur_device_handle_t Device = Command->CommandBuffer->Device; // Update pointer arguments to the kernel @@ -1284,29 +1295,29 @@ updateKernelArguments(const ur_exp_command_buffer_update_kernel_launch_desc_t ur_result_t updateCommand(const ur_exp_command_buffer_update_kernel_launch_desc_t &UpdateCommandDesc) { - auto Command = - static_cast(UpdateCommandDesc.hCommand); + auto *Command = UpdateCommandDesc.hCommand; + auto &KernelData = std::get(Command->CommandData); if (UpdateCommandDesc.hNewKernel) { - Command->Kernel = UpdateCommandDesc.hNewKernel; + KernelData.Kernel = UpdateCommandDesc.hNewKernel; } if (UpdateCommandDesc.newWorkDim) { - Command->WorkDim = UpdateCommandDesc.newWorkDim; + KernelData.WorkDim = UpdateCommandDesc.newWorkDim; } if (UpdateCommandDesc.pNewGlobalWorkOffset) { - Command->setGlobalOffset(UpdateCommandDesc.pNewGlobalWorkOffset); + KernelData.setGlobalOffset(UpdateCommandDesc.pNewGlobalWorkOffset); } if (UpdateCommandDesc.pNewGlobalWorkSize) { - Command->setGlobalSize(UpdateCommandDesc.pNewGlobalWorkSize); + KernelData.setGlobalSize(UpdateCommandDesc.pNewGlobalWorkSize); if (!UpdateCommandDesc.pNewLocalWorkSize) { - Command->setNullLocalSize(); + KernelData.setNullLocalSize(); } } if (UpdateCommandDesc.pNewLocalWorkSize) { - Command->setLocalSize(UpdateCommandDesc.pNewLocalWorkSize); + KernelData.setLocalSize(UpdateCommandDesc.pNewLocalWorkSize); } return UR_RESULT_SUCCESS; @@ -1334,27 +1345,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( // If no work-size is provided make sure we pass nullptr to setKernelParams // so it can guess the local work size. - auto KernelCommandHandle = - static_cast(UpdateCommandDesc.hCommand); - const bool ProvidedLocalSize = !KernelCommandHandle->isNullLocalSize(); + auto *KernelCommandHandle = UpdateCommandDesc.hCommand; + auto &KernelData = + std::get(KernelCommandHandle->CommandData); + const bool ProvidedLocalSize = !KernelData.isNullLocalSize(); size_t *LocalWorkSize = - ProvidedLocalSize ? KernelCommandHandle->LocalWorkSize : nullptr; + ProvidedLocalSize ? KernelData.LocalWorkSize : nullptr; // Set the number of threads per block to the number of threads per warp // by default unless user has provided a better number. size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - CUfunction CuFunc = KernelCommandHandle->Kernel->get(); + CUfunction CuFunc = KernelData.Kernel->get(); auto Result = setKernelParams( - hCommandBuffer->Context, hCommandBuffer->Device, - KernelCommandHandle->WorkDim, KernelCommandHandle->GlobalWorkOffset, - KernelCommandHandle->GlobalWorkSize, LocalWorkSize, - KernelCommandHandle->Kernel, CuFunc, ThreadsPerBlock, BlocksPerGrid); + hCommandBuffer->Context, hCommandBuffer->Device, KernelData.WorkDim, + KernelData.GlobalWorkOffset, KernelData.GlobalWorkSize, LocalWorkSize, + KernelData.Kernel, CuFunc, ThreadsPerBlock, BlocksPerGrid); if (Result != UR_RESULT_SUCCESS) { return Result; } - CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params; + CUDA_KERNEL_NODE_PARAMS &Params = KernelData.Params; Params.func = CuFunc; Params.gridDimX = BlocksPerGrid[0]; @@ -1363,9 +1374,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.blockDimX = ThreadsPerBlock[0]; Params.blockDimY = ThreadsPerBlock[1]; Params.blockDimZ = ThreadsPerBlock[2]; - Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize(); - Params.kernelParams = const_cast( - KernelCommandHandle->Kernel->getArgPointers().data()); + Params.sharedMemBytes = KernelData.Kernel->getLocalSize(); + Params.kernelParams = + const_cast(KernelData.Kernel->getArgPointers().data()); CUgraphNode Node = KernelCommandHandle->Node; CUgraphExec CudaGraphExec = hCommandBuffer->CudaGraphExec; diff --git a/unified-runtime/source/adapters/cuda/command_buffer.hpp b/unified-runtime/source/adapters/cuda/command_buffer.hpp index 3a5c3aed361e7..d6273c325235a 100644 --- a/unified-runtime/source/adapters/cuda/command_buffer.hpp +++ b/unified-runtime/source/adapters/cuda/command_buffer.hpp @@ -33,44 +33,15 @@ enum class CommandType { USMAdvise }; -// Command handle that can be returned from command append entry-points. -// Implemented as an abstract base class that handles for the specific -// command types derive from. -struct ur_exp_command_buffer_command_handle_t_ { - ur_exp_command_buffer_command_handle_t_( - ur_exp_command_buffer_handle_t CommandBuffer, CUgraphNode Node, - CUgraphNode SignalNode, const std::vector &WaitNodes) - : CommandBuffer(CommandBuffer), Node(Node), SignalNode(SignalNode), - WaitNodes(WaitNodes) {} - - virtual ~ur_exp_command_buffer_command_handle_t_() {} - - virtual CommandType getCommandType() const noexcept = 0; - - // Parent UR command-buffer. - ur_exp_command_buffer_handle_t CommandBuffer; - // Node created in graph for the command. - CUgraphNode Node; - // An optional EventRecordNode that's a successor of Node to signal - // dependent commands outwith the command-buffer. - CUgraphNode SignalNode; - // Optional list of EventWait Nodes to wait on commands from outside of the - // command-buffer. - std::vector WaitNodes; -}; - -struct kernel_command_handle : ur_exp_command_buffer_command_handle_t_ { - kernel_command_handle( - ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, - CUgraphNode Node, CUDA_KERNEL_NODE_PARAMS Params, uint32_t WorkDim, - const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, - ur_kernel_handle_t *KernelAlternatives, CUgraphNode SignalNode, - const std::vector &WaitNodes); +struct null_command_data {}; - CommandType getCommandType() const noexcept override { - return CommandType::Kernel; - } +struct kernel_command_data { + kernel_command_data(ur_kernel_handle_t Kernel, CUDA_KERNEL_NODE_PARAMS Params, + uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, + const size_t *GlobalWorkSizePtr, + const size_t *LocalWorkSizePtr, + uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives); void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { const size_t CopySize = sizeof(size_t) * WorkDim; @@ -122,142 +93,39 @@ struct kernel_command_handle : ur_exp_command_buffer_command_handle_t_ { size_t LocalWorkSize[3]; }; -struct usm_memcpy_command_handle : ur_exp_command_buffer_command_handle_t_ { - usm_memcpy_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::USMMemcpy; - } -}; - -struct usm_fill_command_handle : ur_exp_command_buffer_command_handle_t_ { - usm_fill_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes, - const std::vector &DecomposedNodes = {}) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes), - DecomposedNodes(std::move(DecomposedNodes)) {} - CommandType getCommandType() const noexcept override { - return CommandType::USMFill; - } - - // If this fill command was decomposed into multiple nodes, this vector - // contains all of those nodes in the order they were added to the graph. - // Currently unused but will be required for updating in future. +struct fill_command_data { std::vector DecomposedNodes; }; -struct buffer_copy_command_handle : ur_exp_command_buffer_command_handle_t_ { - buffer_copy_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferCopy; - } -}; - -struct buffer_copy_rect_command_handle - : ur_exp_command_buffer_command_handle_t_ { - buffer_copy_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferCopyRect; - } -}; - -struct buffer_read_command_handle : ur_exp_command_buffer_command_handle_t_ { - buffer_read_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferRead; - } -}; - -struct buffer_read_rect_command_handle - : ur_exp_command_buffer_command_handle_t_ { - buffer_read_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferReadRect; - } -}; - -struct buffer_write_command_handle : ur_exp_command_buffer_command_handle_t_ { - buffer_write_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferWrite; - } -}; - -struct buffer_write_rect_command_handle - : ur_exp_command_buffer_command_handle_t_ { - buffer_write_rect_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferWriteRect; - } -}; - -struct buffer_fill_command_handle : ur_exp_command_buffer_command_handle_t_ { - buffer_fill_command_handle( - ur_exp_command_buffer_handle_t CommandBuffer, CUgraphNode Node, - CUgraphNode SignalNode, const std::vector &WaitNodes, - const std::vector &DecomposedNodes = {}) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes), - DecomposedNodes(std::move(DecomposedNodes)) {} - CommandType getCommandType() const noexcept override { - return CommandType::MemBufferFill; - } +// Command handle that can be returned from command append entry-points. +// The type of the command is specified by a CommandType field, with +// additional command-type-specific data stored in the CommandData enum. +struct ur_exp_command_buffer_command_handle_t_ : ur::cuda::handle_base { + using command_data_type_t = + std::variant; - // If this fill command was decomposed into multiple nodes, this vector - // contains all of those nodes in the order they were added to the graph. - // Currently unused but will be required for updating in future. - std::vector DecomposedNodes; -}; + ur_exp_command_buffer_command_handle_t_( + CommandType Type, ur_exp_command_buffer_handle_t CommandBuffer, + CUgraphNode Node, CUgraphNode SignalNode, + const std::vector &WaitNodes, + command_data_type_t Data = null_command_data{}) + : handle_base(), CommandBuffer(CommandBuffer), Node(Node), + SignalNode(SignalNode), WaitNodes(WaitNodes), Type(Type), + CommandData(Data) {} -struct usm_prefetch_command_handle : ur_exp_command_buffer_command_handle_t_ { - usm_prefetch_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::USMPrefetch; - } -}; + // Parent UR command-buffer. + ur_exp_command_buffer_handle_t CommandBuffer; + // Node created in graph for the command. + CUgraphNode Node; + // An optional EventRecordNode that's a successor of Node to signal + // dependent commands outwith the command-buffer. + CUgraphNode SignalNode; + // Optional list of EventWait Nodes to wait on commands from outside of the + // command-buffer. + std::vector WaitNodes; -struct usm_advise_command_handle : ur_exp_command_buffer_command_handle_t_ { - usm_advise_command_handle(ur_exp_command_buffer_handle_t CommandBuffer, - CUgraphNode Node, CUgraphNode SignalNode, - const std::vector &WaitNodes) - : ur_exp_command_buffer_command_handle_t_(CommandBuffer, Node, SignalNode, - WaitNodes) {} - CommandType getCommandType() const noexcept override { - return CommandType::USMAdvise; - } + CommandType Type; + command_data_type_t CommandData; }; struct ur_exp_command_buffer_handle_t_ { diff --git a/unified-runtime/source/adapters/cuda/common.hpp b/unified-runtime/source/adapters/cuda/common.hpp index 6838024f6be34..cc44e71229123 100644 --- a/unified-runtime/source/adapters/cuda/common.hpp +++ b/unified-runtime/source/adapters/cuda/common.hpp @@ -89,3 +89,10 @@ inline umf_result_t setCUMemoryProviderParams( } } // namespace umf + +namespace ur::cuda { +struct ddi_getter { + const static ur_dditable_t *value(); +}; +using handle_base = ur::handle_base; +} // namespace ur::cuda diff --git a/unified-runtime/source/adapters/cuda/context.hpp b/unified-runtime/source/adapters/cuda/context.hpp index d22b2b5442201..39075309f5cf8 100644 --- a/unified-runtime/source/adapters/cuda/context.hpp +++ b/unified-runtime/source/adapters/cuda/context.hpp @@ -110,7 +110,7 @@ CreateHostMemoryProviderPool(ur_device_handle_t_ *DeviceHandle, return UR_RESULT_SUCCESS; } -struct ur_context_handle_t_ { +struct ur_context_handle_t_ : ur::cuda::handle_base { struct deleter_data { ur_context_extended_deleter_t Function; @@ -128,7 +128,7 @@ struct ur_context_handle_t_ { umf_memory_pool_handle_t MemoryPoolHost = nullptr; ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) - : Devices{Devs, Devs + NumDevices}, RefCount{1} { + : handle_base(), Devices{Devs, Devs + NumDevices}, RefCount{1} { // Create UMF CUDA memory provider for the host memory // (UMF_MEMORY_TYPE_HOST) from any device (Devices[0] is used here, because // it is guaranteed to exist). diff --git a/unified-runtime/source/adapters/cuda/device.hpp b/unified-runtime/source/adapters/cuda/device.hpp index d5716a839b9d6..5796028b833a6 100644 --- a/unified-runtime/source/adapters/cuda/device.hpp +++ b/unified-runtime/source/adapters/cuda/device.hpp @@ -16,7 +16,7 @@ #include "common.hpp" -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : ur::cuda::handle_base { private: using native_type = CUdevice; @@ -42,8 +42,8 @@ struct ur_device_handle_t_ { public: ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase, ur_platform_handle_t platform, uint32_t DevIndex) - : CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), RefCount{1}, - Platform(platform), DeviceIndex{DevIndex} { + : handle_base(), CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), + RefCount{1}, Platform(platform), DeviceIndex{DevIndex} { UR_CHECK_ERROR(cuDeviceGetAttribute( &MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, cuDevice)); diff --git a/unified-runtime/source/adapters/cuda/event.cpp b/unified-runtime/source/adapters/cuda/event.cpp index dd7e961db79a2..fb419ee64b9b8 100644 --- a/unified-runtime/source/adapters/cuda/event.cpp +++ b/unified-runtime/source/adapters/cuda/event.cpp @@ -24,7 +24,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, native_type EvEnd, native_type EvQueued, native_type EvStart, CUstream Stream, uint32_t StreamToken) - : CommandType{Type}, RefCount{1}, HasOwnership{true}, + : handle_base(), CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart}, EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} { @@ -34,11 +34,12 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, CUevent EventNative) - : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, - HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - IsInterop{true}, StreamToken{std::numeric_limits::max()}, - EventID{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, - Queue{nullptr}, Stream{nullptr}, Context{Context} { + : handle_base(), CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, + HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, + IsStarted{false}, IsInterop{true}, + StreamToken{std::numeric_limits::max()}, EventID{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/cuda/event.hpp b/unified-runtime/source/adapters/cuda/event.hpp index 10b52799a90f3..1cd8579bf4374 100644 --- a/unified-runtime/source/adapters/cuda/event.hpp +++ b/unified-runtime/source/adapters/cuda/event.hpp @@ -17,7 +17,7 @@ /// UR Event mapping to CUevent /// -struct ur_event_handle_t_ { +struct ur_event_handle_t_ : ur::cuda::handle_base { public: using native_type = CUevent; diff --git a/unified-runtime/source/adapters/cuda/kernel.hpp b/unified-runtime/source/adapters/cuda/kernel.hpp index 36ae261ed72bd..e7b9a4e016abf 100644 --- a/unified-runtime/source/adapters/cuda/kernel.hpp +++ b/unified-runtime/source/adapters/cuda/kernel.hpp @@ -34,7 +34,7 @@ /// A compiler pass converts the UR API local memory model into the /// CUDA shared model. This object simply calculates the total of /// shared memory, and the initial offsets of each parameter. -struct ur_kernel_handle_t_ { +struct ur_kernel_handle_t_ : ur::cuda::handle_base { using native_type = CUfunction; native_type Function; @@ -253,8 +253,9 @@ struct ur_kernel_handle_t_ { ur_kernel_handle_t_(CUfunction Func, CUfunction FuncWithOffsetParam, const char *Name, ur_program_handle_t Program, ur_context_handle_t Context) - : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam}, - Name{Name}, Context{Context}, Program{Program}, RefCount{1} { + : handle_base(), Function{Func}, + FunctionWithOffsetParam{FuncWithOffsetParam}, Name{Name}, + Context{Context}, Program{Program}, RefCount{1} { urProgramRetain(Program); urContextRetain(Context); /// Note: this code assumes that there is only one device per context diff --git a/unified-runtime/source/adapters/cuda/memory.hpp b/unified-runtime/source/adapters/cuda/memory.hpp index 65f065b1ec3c4..92aeb5878b952 100644 --- a/unified-runtime/source/adapters/cuda/memory.hpp +++ b/unified-runtime/source/adapters/cuda/memory.hpp @@ -310,7 +310,7 @@ struct SurfaceMem { /// is on a different device, marked by /// LastQueueWritingToMemObj->getDevice() /// -struct ur_mem_handle_t_ { +struct ur_mem_handle_t_ : ur::cuda::handle_base { // Context where the memory object is accessible ur_context_handle_t Context; @@ -345,7 +345,7 @@ struct ur_mem_handle_t_ { /// Constructs the UR mem handler for a non-typed allocation ("buffer") ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags, BufferMem::AllocMode Mode, void *HostPtr, size_t Size) - : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + : handle_base(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), Mem{std::in_place_type, Ctxt, this, Mode, HostPtr, Size} { urContextRetain(Context); @@ -353,9 +353,9 @@ struct ur_mem_handle_t_ { // Subbuffer constructor ur_mem_handle_t_(ur_mem_handle_t Parent, size_t SubBufferOffset) - : Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags}, - HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(), - false), + : handle_base(), Context{Parent->Context}, RefCount{1}, + MemFlags{Parent->MemFlags}, HaveMigratedToDeviceSinceLastWrite( + Parent->Context->Devices.size(), false), Mem{BufferMem{std::get(Parent->Mem)}} { auto &SubBuffer = std::get(Mem); SubBuffer.Parent = Parent; @@ -376,7 +376,7 @@ struct ur_mem_handle_t_ { ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags, ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc, void *HostPtr) - : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + : handle_base(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), Mem{std::in_place_type, Ctxt, diff --git a/unified-runtime/source/adapters/cuda/physical_mem.hpp b/unified-runtime/source/adapters/cuda/physical_mem.hpp index 2c7efabae35dc..d9abe587b1b5f 100644 --- a/unified-runtime/source/adapters/cuda/physical_mem.hpp +++ b/unified-runtime/source/adapters/cuda/physical_mem.hpp @@ -20,7 +20,7 @@ /// UR queue mapping on physical memory allocations used in virtual memory /// management. /// -struct ur_physical_mem_handle_t_ { +struct ur_physical_mem_handle_t_ : ur::cuda::handle_base { using native_type = CUmemGenericAllocationHandle; std::atomic_uint32_t RefCount; @@ -33,8 +33,8 @@ struct ur_physical_mem_handle_t_ { ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx, ur_device_handle_t Device, size_t Size, ur_physical_mem_properties_t Properties) - : RefCount(1), PhysicalMem(PhysMem), Context(Ctx), Device(Device), - Size(Size), Properties(Properties) { + : handle_base(), RefCount(1), PhysicalMem(PhysMem), Context(Ctx), + Device(Device), Size(Size), Properties(Properties) { urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/cuda/platform.hpp b/unified-runtime/source/adapters/cuda/platform.hpp index 8ecc19c3e9f61..13ef6ca24ba10 100644 --- a/unified-runtime/source/adapters/cuda/platform.hpp +++ b/unified-runtime/source/adapters/cuda/platform.hpp @@ -11,13 +11,14 @@ #ifndef UR_CUDA_PLATFORM_HPP_INCLUDED #define UR_CUDA_PLATFORM_HPP_INCLUDED +#include "common.hpp" #include "device.hpp" #include #include #include -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : ur::cuda::handle_base { std::vector> Devices; }; diff --git a/unified-runtime/source/adapters/cuda/program.hpp b/unified-runtime/source/adapters/cuda/program.hpp index df93b6375bf08..b6478a4973d58 100644 --- a/unified-runtime/source/adapters/cuda/program.hpp +++ b/unified-runtime/source/adapters/cuda/program.hpp @@ -17,7 +17,7 @@ #include "context.hpp" -struct ur_program_handle_t_ { +struct ur_program_handle_t_ : ur::cuda::handle_base { using native_type = CUmodule; native_type Module; const char *Binary; @@ -48,10 +48,10 @@ struct ur_program_handle_t_ { ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; ur_program_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, - Context{Context}, Device{Device}, KernelReqdWorkGroupSizeMD{}, - KernelMaxWorkGroupSizeMD{}, KernelMaxLinearWorkGroupSizeMD{}, - KernelReqdSubGroupSizeMD{} { + : handle_base(), Module{nullptr}, Binary{}, BinarySizeInBytes{0}, + RefCount{1}, Context{Context}, Device{Device}, + KernelReqdWorkGroupSizeMD{}, KernelMaxWorkGroupSizeMD{}, + KernelMaxLinearWorkGroupSizeMD{}, KernelReqdSubGroupSizeMD{} { urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/cuda/queue.cpp b/unified-runtime/source/adapters/cuda/queue.cpp index 9d606583b182e..a06a06b9982e1 100644 --- a/unified-runtime/source/adapters/cuda/queue.cpp +++ b/unified-runtime/source/adapters/cuda/queue.cpp @@ -92,7 +92,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, } Queue = std::unique_ptr(new ur_queue_handle_t_{ - {IsOutOfOrder, hContext, hDevice, Flags, URFlags, Priority}}); + {}, {IsOutOfOrder, hContext, hDevice, Flags, URFlags, Priority}}); *phQueue = Queue.release(); @@ -211,7 +211,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( // Create queue from a native stream *phQueue = new ur_queue_handle_t_{ - {CuStream, hContext, hDevice, CuFlags, Flags, isNativeHandleOwned}}; + {}, {CuStream, hContext, hDevice, CuFlags, Flags, isNativeHandleOwned}}; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/cuda/queue.hpp b/unified-runtime/source/adapters/cuda/queue.hpp index 430b74cf192dd..01e88731619b6 100644 --- a/unified-runtime/source/adapters/cuda/queue.hpp +++ b/unified-runtime/source/adapters/cuda/queue.hpp @@ -20,7 +20,8 @@ #include using cuda_stream_queue = stream_queue_t; -struct ur_queue_handle_t_ : public cuda_stream_queue {}; +struct ur_queue_handle_t_ : public ur::cuda::handle_base, + public cuda_stream_queue {}; using InteropGuard = cuda_stream_queue::interop_guard; diff --git a/unified-runtime/source/adapters/cuda/sampler.hpp b/unified-runtime/source/adapters/cuda/sampler.hpp index 4823541c735de..e429439848e06 100644 --- a/unified-runtime/source/adapters/cuda/sampler.hpp +++ b/unified-runtime/source/adapters/cuda/sampler.hpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include /// Implementation of samplers for CUDA @@ -23,7 +24,7 @@ /// | 4 3 2 | addressing mode 1 /// | 1 | filter mode /// | 0 | normalize coords -struct ur_sampler_handle_t_ { +struct ur_sampler_handle_t_ : ur::cuda::handle_base { std::atomic_uint32_t RefCount; uint32_t Props; float MinMipmapLevelClamp; @@ -32,7 +33,7 @@ struct ur_sampler_handle_t_ { ur_context_handle_t Context; ur_sampler_handle_t_(ur_context_handle_t Context) - : RefCount(1), Props(0), MinMipmapLevelClamp(0.0f), + : handle_base(), RefCount(1), Props(0), MinMipmapLevelClamp(0.0f), MaxMipmapLevelClamp(0.0f), MaxAnisotropy(0.0f), Context(Context) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp index 32efb2a10aad6..188292f4bc228 100644 --- a/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/cuda/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -472,6 +473,43 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version, + ur_dditable_t *pDdiTable) { + urGetGlobalProcAddrTable(version, &pDdiTable->Global); + urGetBindlessImagesExpProcAddrTable(version, &pDdiTable->BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(version, &pDdiTable->CommandBufferExp); + urGetContextProcAddrTable(version, &pDdiTable->Context); + urGetEnqueueProcAddrTable(version, &pDdiTable->Enqueue); + urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp); + urGetEventProcAddrTable(version, &pDdiTable->Event); + urGetKernelProcAddrTable(version, &pDdiTable->Kernel); + urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp); + urGetMemProcAddrTable(version, &pDdiTable->Mem); + urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem); + urGetPlatformProcAddrTable(version, &pDdiTable->Platform); + urGetProgramProcAddrTable(version, &pDdiTable->Program); + urGetProgramExpProcAddrTable(version, &pDdiTable->ProgramExp); + urGetQueueProcAddrTable(version, &pDdiTable->Queue); + urGetSamplerProcAddrTable(version, &pDdiTable->Sampler); + urGetUSMProcAddrTable(version, &pDdiTable->USM); + urGetUSMExpProcAddrTable(version, &pDdiTable->USMExp); + urGetUsmP2PExpProcAddrTable(version, &pDdiTable->UsmP2PExp); + urGetVirtualMemProcAddrTable(version, &pDdiTable->VirtualMem); + urGetDeviceProcAddrTable(version, &pDdiTable->Device); + urGetAdapterProcAddrTable(version, &pDdiTable->Adapter); + + return UR_RESULT_SUCCESS; +} + #if defined(__cplusplus) } // extern "C" #endif + +const ur_dditable_t *ur::cuda::ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, + []() { urAllAddrTable(UR_API_VERSION_CURRENT, &table); }); + return &table; +} diff --git a/unified-runtime/source/adapters/cuda/usm.cpp b/unified-runtime/source/adapters/cuda/usm.cpp index 240ef55792a20..d140f5d39e411 100644 --- a/unified-runtime/source/adapters/cuda/usm.cpp +++ b/unified-runtime/source/adapters/cuda/usm.cpp @@ -210,7 +210,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) - : Context{Context} { + : handle_base(), Context{Context} { const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { const ur_base_desc_t *BaseDesc = static_cast(pNext); diff --git a/unified-runtime/source/adapters/cuda/usm.hpp b/unified-runtime/source/adapters/cuda/usm.hpp index 9ddb53adc667b..27e1beb6b606c 100644 --- a/unified-runtime/source/adapters/cuda/usm.hpp +++ b/unified-runtime/source/adapters/cuda/usm.hpp @@ -17,7 +17,7 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); // A ur_usm_pool_handle_t can represent different types of memory pools. It may // sit on top of a UMF pool or a CUmemoryPool, but not both. -struct ur_usm_pool_handle_t_ { +struct ur_usm_pool_handle_t_ : ur::cuda::handle_base { std::atomic_uint32_t RefCount = 1; ur_context_handle_t Context = nullptr; diff --git a/unified-runtime/source/adapters/hip/adapter.cpp b/unified-runtime/source/adapters/hip/adapter.cpp index 180510bc2826e..dc10100fa1036 100644 --- a/unified-runtime/source/adapters/hip/adapter.cpp +++ b/unified-runtime/source/adapters/hip/adapter.cpp @@ -38,7 +38,8 @@ class ur_legacy_sink : public logger::Sink { // through UR entry points. // https://github.com/oneapi-src/unified-runtime/issues/1330 ur_adapter_handle_t_::ur_adapter_handle_t_() - : logger(logger::get_logger("hip", + : handle_base(), + logger(logger::get_logger("hip", /*default_log_level*/ UR_LOGGER_LEVEL_ERROR)) { Platform = std::make_unique(); if (std::getenv("UR_LOG_HIP") != nullptr) diff --git a/unified-runtime/source/adapters/hip/adapter.hpp b/unified-runtime/source/adapters/hip/adapter.hpp index c4a750eee2389..5b132df0a9a5f 100644 --- a/unified-runtime/source/adapters/hip/adapter.hpp +++ b/unified-runtime/source/adapters/hip/adapter.hpp @@ -17,7 +17,7 @@ #include #include -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : public ur::hip::handle_base { std::atomic RefCount = 0; logger::Logger &logger; std::unique_ptr Platform; diff --git a/unified-runtime/source/adapters/hip/command_buffer.cpp b/unified-runtime/source/adapters/hip/command_buffer.cpp index 9d7cb2346637a..2330e84ef8eed 100644 --- a/unified-runtime/source/adapters/hip/command_buffer.cpp +++ b/unified-runtime/source/adapters/hip/command_buffer.cpp @@ -23,8 +23,9 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) - : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), - HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} { + : handle_base(), Context(hContext), Device(hDevice), + IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, + RefCount{1}, NextSyncPoint{0} { urContextRetain(hContext); } @@ -50,8 +51,8 @@ ur_exp_command_buffer_command_handle_t_:: const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, ur_kernel_handle_t *KernelAlternatives) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim) { + : handle_base(), CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), + Params(Params), WorkDim(WorkDim) { const size_t CopySize = sizeof(size_t) * WorkDim; std::memcpy(GlobalWorkOffset, GlobalWorkOffsetPtr, CopySize); std::memcpy(GlobalWorkSize, GlobalWorkSizePtr, CopySize); diff --git a/unified-runtime/source/adapters/hip/command_buffer.hpp b/unified-runtime/source/adapters/hip/command_buffer.hpp index bc5011003c118..ba2c797111aee 100644 --- a/unified-runtime/source/adapters/hip/command_buffer.hpp +++ b/unified-runtime/source/adapters/hip/command_buffer.hpp @@ -22,7 +22,7 @@ // Struct that stores all the information related to a kernel command in a // command-buffer, such that the command can be recreated. When handles can // be returned from other command types this struct will need refactored. -struct ur_exp_command_buffer_command_handle_t_ { +struct ur_exp_command_buffer_command_handle_t_ : ur::hip::handle_base { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, hipGraphNode_t Node, hipKernelNodeParams Params, uint32_t WorkDim, @@ -83,7 +83,7 @@ struct ur_exp_command_buffer_command_handle_t_ { size_t LocalWorkSize[3]; }; -struct ur_exp_command_buffer_handle_t_ { +struct ur_exp_command_buffer_handle_t_ : ur::hip::handle_base { ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable); diff --git a/unified-runtime/source/adapters/hip/common.hpp b/unified-runtime/source/adapters/hip/common.hpp index edf9fbf08313d..386411cb0de33 100644 --- a/unified-runtime/source/adapters/hip/common.hpp +++ b/unified-runtime/source/adapters/hip/common.hpp @@ -157,3 +157,10 @@ inline static unsigned getMemoryType(hipPointerAttribute_t hipPointerAttrs) { return hipPointerAttrs.memoryType; #endif } + +namespace ur::hip { +struct ddi_getter { + static const ur_dditable_t *value(); +}; +using handle_base = ur::handle_base; +} // namespace ur::hip diff --git a/unified-runtime/source/adapters/hip/context.hpp b/unified-runtime/source/adapters/hip/context.hpp index 1d2b94562622b..3c011cec43a1b 100644 --- a/unified-runtime/source/adapters/hip/context.hpp +++ b/unified-runtime/source/adapters/hip/context.hpp @@ -77,7 +77,7 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); /// between native allocations for devices in the same \c ur_context_handle_t_ /// if necessary. /// -struct ur_context_handle_t_ { +struct ur_context_handle_t_ : ur::hip::handle_base { struct deleter_data { ur_context_extended_deleter_t Function; @@ -91,7 +91,7 @@ struct ur_context_handle_t_ { std::atomic_uint32_t RefCount; ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) - : Devices{Devs, Devs + NumDevices}, RefCount{1} { + : handle_base(), Devices{Devs, Devs + NumDevices}, RefCount{1} { UR_CHECK_ERROR(urAdapterRetain(ur::hip::adapter)); }; diff --git a/unified-runtime/source/adapters/hip/device.hpp b/unified-runtime/source/adapters/hip/device.hpp index c8d36ff50c116..b1a4f2a5c3737 100644 --- a/unified-runtime/source/adapters/hip/device.hpp +++ b/unified-runtime/source/adapters/hip/device.hpp @@ -17,7 +17,7 @@ /// Includes an observer pointer to the platform, /// and implements the reference counting semantics since /// HIP objects are not refcounted. -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : ur::hip::handle_base { private: using native_type = hipDevice_t; @@ -39,8 +39,8 @@ struct ur_device_handle_t_ { public: ur_device_handle_t_(native_type HipDevice, hipEvent_t EvBase, ur_platform_handle_t Platform, uint32_t DeviceIndex) - : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), EvBase(EvBase), - DeviceIndex(DeviceIndex) { + : handle_base(), HIPDevice(HipDevice), RefCount{1}, Platform(Platform), + EvBase(EvBase), DeviceIndex(DeviceIndex) { UR_CHECK_ERROR(hipDeviceGetAttribute( &MaxWorkGroupSize, hipDeviceAttributeMaxThreadsPerBlock, HIPDevice)); diff --git a/unified-runtime/source/adapters/hip/event.cpp b/unified-runtime/source/adapters/hip/event.cpp index 5162df971cfe9..a8ef15bee64d8 100644 --- a/unified-runtime/source/adapters/hip/event.cpp +++ b/unified-runtime/source/adapters/hip/event.cpp @@ -19,7 +19,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, hipEvent_t EvEnd, hipEvent_t EvQueued, hipEvent_t EvStart, hipStream_t Stream, uint32_t StreamToken) - : CommandType{Type}, RefCount{1}, HasOwnership{true}, + : handle_base(), CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, StreamToken{StreamToken}, EventId{0}, EvEnd{EvEnd}, EvStart{EvStart}, EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} { @@ -29,11 +29,12 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, hipEvent_t EventNative) - : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, - HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - IsInterop{true}, StreamToken{std::numeric_limits::max()}, - EventId{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, - Queue{nullptr}, Stream{nullptr}, Context{Context} { + : handle_base(), CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, + HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, + IsStarted{false}, IsInterop{true}, + StreamToken{std::numeric_limits::max()}, EventId{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/hip/event.hpp b/unified-runtime/source/adapters/hip/event.hpp index ab9544a552ace..8b513d6e9c2c0 100644 --- a/unified-runtime/source/adapters/hip/event.hpp +++ b/unified-runtime/source/adapters/hip/event.hpp @@ -14,7 +14,7 @@ /// UR Event mapping to hipEvent_t /// -struct ur_event_handle_t_ { +struct ur_event_handle_t_ : ur::hip::handle_base { public: using native_type = hipEvent_t; diff --git a/unified-runtime/source/adapters/hip/kernel.hpp b/unified-runtime/source/adapters/hip/kernel.hpp index 97c80c32709ee..230541dd3cdad 100644 --- a/unified-runtime/source/adapters/hip/kernel.hpp +++ b/unified-runtime/source/adapters/hip/kernel.hpp @@ -33,7 +33,7 @@ /// A compiler pass converts the UR API local memory model into the /// HIP shared model. This object simply calculates the total of /// shared memory, and the initial offsets of each parameter. -struct ur_kernel_handle_t_ { +struct ur_kernel_handle_t_ : ur::hip::handle_base { using native_type = hipFunction_t; native_type Function; @@ -236,8 +236,9 @@ struct ur_kernel_handle_t_ { ur_kernel_handle_t_(hipFunction_t Func, hipFunction_t FuncWithOffsetParam, const char *Name, ur_program_handle_t Program, ur_context_handle_t Ctxt) - : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam}, - Name{Name}, Context{Ctxt}, Program{Program}, RefCount{1} { + : handle_base(), Function{Func}, + FunctionWithOffsetParam{FuncWithOffsetParam}, Name{Name}, Context{Ctxt}, + Program{Program}, RefCount{1} { assert(Program->getDevice()); UR_CHECK_ERROR(urKernelGetGroupInfo( this, Program->getDevice(), diff --git a/unified-runtime/source/adapters/hip/memory.hpp b/unified-runtime/source/adapters/hip/memory.hpp index 3fc6352ff5aec..b2367edf8f4b7 100644 --- a/unified-runtime/source/adapters/hip/memory.hpp +++ b/unified-runtime/source/adapters/hip/memory.hpp @@ -307,7 +307,7 @@ struct SurfaceMem { /// Migrations will occur in both cases if the most recent version of data /// is on a different device, marked by LastQueueWritingToMemObj->getDevice(). /// -struct ur_mem_handle_t_ { +struct ur_mem_handle_t_ : ur::hip::handle_base { // TODO: Move as much shared data up as possible using ur_context = ur_context_handle_t_ *; @@ -355,9 +355,9 @@ struct ur_mem_handle_t_ { // Subbuffer constructor ur_mem_handle_t_(ur_mem Parent, size_t SubBufferOffset) - : Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags}, - HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(), - false), + : handle_base(), Context{Parent->Context}, RefCount{1}, + MemFlags{Parent->MemFlags}, HaveMigratedToDeviceSinceLastWrite( + Parent->Context->Devices.size(), false), Mem{BufferMem{std::get(Parent->Mem)}} { auto &SubBuffer = std::get(Mem); SubBuffer.Parent = Parent; diff --git a/unified-runtime/source/adapters/hip/physical_mem.hpp b/unified-runtime/source/adapters/hip/physical_mem.hpp index fc50836f62ea3..47342ae206510 100644 --- a/unified-runtime/source/adapters/hip/physical_mem.hpp +++ b/unified-runtime/source/adapters/hip/physical_mem.hpp @@ -17,10 +17,10 @@ /// management. /// TODO: Implement. /// -struct ur_physical_mem_handle_t_ { +struct ur_physical_mem_handle_t_ : ur::hip::handle_base { std::atomic_uint32_t RefCount; - ur_physical_mem_handle_t_() : RefCount(1) {} + ur_physical_mem_handle_t_() : handle_base(), RefCount(1) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/unified-runtime/source/adapters/hip/platform.hpp b/unified-runtime/source/adapters/hip/platform.hpp index 7b96de64732f5..446858b5d9fb6 100644 --- a/unified-runtime/source/adapters/hip/platform.hpp +++ b/unified-runtime/source/adapters/hip/platform.hpp @@ -19,6 +19,6 @@ /// available devices since initialization is done /// when devices are used. /// -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : ur::hip::handle_base { std::vector> Devices; }; diff --git a/unified-runtime/source/adapters/hip/program.hpp b/unified-runtime/source/adapters/hip/program.hpp index 40a98d1104b8c..38857f635bcfd 100644 --- a/unified-runtime/source/adapters/hip/program.hpp +++ b/unified-runtime/source/adapters/hip/program.hpp @@ -17,7 +17,7 @@ #include "context.hpp" /// Implementation of UR Program on HIP Module object -struct ur_program_handle_t_ { +struct ur_program_handle_t_ : ur::hip::handle_base { using native_type = hipModule_t; native_type Module; const char *Binary; @@ -48,8 +48,8 @@ struct ur_program_handle_t_ { ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; ur_program_handle_t_(ur_context_handle_t Ctxt, ur_device_handle_t Device) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, - Context{Ctxt}, Device{Device}, KernelReqdWorkGroupSizeMD{}, + : handle_base(), Module{nullptr}, Binary{}, BinarySizeInBytes{0}, + RefCount{1}, Context{Ctxt}, Device{Device}, KernelReqdWorkGroupSizeMD{}, KernelReqdSubGroupSizeMD{} { urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/hip/queue.cpp b/unified-runtime/source/adapters/hip/queue.cpp index 77de9f9c8efd0..5d449c4e3e854 100644 --- a/unified-runtime/source/adapters/hip/queue.cpp +++ b/unified-runtime/source/adapters/hip/queue.cpp @@ -81,7 +81,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice, : false; QueueImpl = std::unique_ptr(new ur_queue_handle_t_{ - {IsOutOfOrder, hContext, hDevice, Flags, URFlags, Priority}}); + {}, {IsOutOfOrder, hContext, hDevice, Flags, URFlags, Priority}}); *phQueue = QueueImpl.release(); @@ -248,7 +248,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( // Create queue and set num_compute_streams to 1, as computeHIPStreams has // valid stream *phQueue = new ur_queue_handle_t_{ - {HIPStream, hContext, hDevice, HIPFlags, Flags, isNativeHandleOwned}}; + {}, {HIPStream, hContext, hDevice, HIPFlags, Flags, isNativeHandleOwned}}; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/hip/queue.hpp b/unified-runtime/source/adapters/hip/queue.hpp index de82c4bc87021..f7478f88883d6 100644 --- a/unified-runtime/source/adapters/hip/queue.hpp +++ b/unified-runtime/source/adapters/hip/queue.hpp @@ -17,7 +17,8 @@ #include using hip_stream_queue = stream_queue_t; -struct ur_queue_handle_t_ : public hip_stream_queue {}; +struct ur_queue_handle_t_ : public ur::hip::handle_base, + public hip_stream_queue {}; using InteropGuard = hip_stream_queue::interop_guard; diff --git a/unified-runtime/source/adapters/hip/sampler.hpp b/unified-runtime/source/adapters/hip/sampler.hpp index 54421e9a68de9..1a1defea851ed 100644 --- a/unified-runtime/source/adapters/hip/sampler.hpp +++ b/unified-runtime/source/adapters/hip/sampler.hpp @@ -25,7 +25,7 @@ /// | 4 3 2 | addressing mode 1 /// | 1 | filter mode /// | 0 | normalize coords -struct ur_sampler_handle_t_ { +struct ur_sampler_handle_t_ : ur::hip::handle_base { std::atomic_uint32_t RefCount; uint32_t Props; float MinMipmapLevelClamp; @@ -34,7 +34,7 @@ struct ur_sampler_handle_t_ { ur_context_handle_t Context; ur_sampler_handle_t_(ur_context_handle_t Context) - : RefCount(1), Props(0), Context(Context) {} + : handle_base(), RefCount(1), Props(0), Context(Context) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp index b7df579c68ece..fb22a927dda00 100644 --- a/unified-runtime/source/adapters/hip/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/hip/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -464,6 +465,43 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version, + ur_dditable_t *pDdiTable) { + urGetGlobalProcAddrTable(version, &pDdiTable->Global); + urGetBindlessImagesExpProcAddrTable(version, &pDdiTable->BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(version, &pDdiTable->CommandBufferExp); + urGetContextProcAddrTable(version, &pDdiTable->Context); + urGetEnqueueProcAddrTable(version, &pDdiTable->Enqueue); + urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp); + urGetEventProcAddrTable(version, &pDdiTable->Event); + urGetKernelProcAddrTable(version, &pDdiTable->Kernel); + urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp); + urGetMemProcAddrTable(version, &pDdiTable->Mem); + urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem); + urGetPlatformProcAddrTable(version, &pDdiTable->Platform); + urGetProgramProcAddrTable(version, &pDdiTable->Program); + urGetProgramExpProcAddrTable(version, &pDdiTable->ProgramExp); + urGetQueueProcAddrTable(version, &pDdiTable->Queue); + urGetSamplerProcAddrTable(version, &pDdiTable->Sampler); + urGetUSMProcAddrTable(version, &pDdiTable->USM); + urGetUSMExpProcAddrTable(version, &pDdiTable->USMExp); + urGetUsmP2PExpProcAddrTable(version, &pDdiTable->UsmP2PExp); + urGetVirtualMemProcAddrTable(version, &pDdiTable->VirtualMem); + urGetDeviceProcAddrTable(version, &pDdiTable->Device); + urGetAdapterProcAddrTable(version, &pDdiTable->Adapter); + + return UR_RESULT_SUCCESS; +} + #if defined(__cplusplus) } // extern "C" #endif + +const ur_dditable_t *ur::hip::ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, + []() { urAllAddrTable(UR_API_VERSION_CURRENT, &table); }); + return &table; +} diff --git a/unified-runtime/source/adapters/hip/usm.cpp b/unified-runtime/source/adapters/hip/usm.cpp index 0e0fdc1721a35..e61d14bfb6a84 100644 --- a/unified-runtime/source/adapters/hip/usm.cpp +++ b/unified-runtime/source/adapters/hip/usm.cpp @@ -348,7 +348,7 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) - : Context(Context) { + : handle_base(), Context(Context) { const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { diff --git a/unified-runtime/source/adapters/hip/usm.hpp b/unified-runtime/source/adapters/hip/usm.hpp index a116090d5d3b1..79408570f9500 100644 --- a/unified-runtime/source/adapters/hip/usm.hpp +++ b/unified-runtime/source/adapters/hip/usm.hpp @@ -15,7 +15,7 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); -struct ur_usm_pool_handle_t_ { +struct ur_usm_pool_handle_t_ : ur::hip::handle_base { std::atomic_uint32_t RefCount = 1; ur_context_handle_t Context = nullptr; diff --git a/unified-runtime/source/adapters/level_zero/adapter.cpp b/unified-runtime/source/adapters/level_zero/adapter.cpp index 978439ecb9c89..9c40fc01343f0 100644 --- a/unified-runtime/source/adapters/level_zero/adapter.cpp +++ b/unified-runtime/source/adapters/level_zero/adapter.cpp @@ -295,7 +295,7 @@ Behavior Summary: SysMan initialization is skipped. */ ur_adapter_handle_t_::ur_adapter_handle_t_() - : logger(logger::get_logger("level_zero")) { + : handle_base(), logger(logger::get_logger("level_zero")) { ZeInitDriversResult = ZE_RESULT_ERROR_UNINITIALIZED; ZeInitResult = ZE_RESULT_ERROR_UNINITIALIZED; ZesResult = ZE_RESULT_ERROR_UNINITIALIZED; diff --git a/unified-runtime/source/adapters/level_zero/adapter.hpp b/unified-runtime/source/adapters/level_zero/adapter.hpp index c41f671d9b9bb..cf96e672c56e4 100644 --- a/unified-runtime/source/adapters/level_zero/adapter.hpp +++ b/unified-runtime/source/adapters/level_zero/adapter.hpp @@ -10,6 +10,7 @@ #pragma once #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include #include #include @@ -24,7 +25,7 @@ using PlatformVec = std::vector>; class ur_legacy_sink; -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : ur::handle_base { ur_adapter_handle_t_(); std::atomic RefCount = 0; std::mutex Mutex; diff --git a/unified-runtime/source/adapters/level_zero/common.hpp b/unified-runtime/source/adapters/level_zero/common.hpp index c09dce82c55b5..536325132b675 100644 --- a/unified-runtime/source/adapters/level_zero/common.hpp +++ b/unified-runtime/source/adapters/level_zero/common.hpp @@ -35,6 +35,7 @@ #include #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" struct _ur_platform_handle_t; @@ -256,7 +257,7 @@ struct ReferenceCounter { }; // Base class to store common data -struct _ur_object { +struct _ur_object : ur::handle_base { _ur_object() : RefCount{} {} // Must be atomic to prevent data race when incrementing/decrementing. @@ -380,4 +381,4 @@ extern thread_local int32_t ErrorAdapterNativeCode; // Utility function for setting a message and warning [[maybe_unused]] void setErrorMessage(const char *pMessage, ur_result_t ErrorCode, - int32_t AdapterErrorCode); \ No newline at end of file + int32_t AdapterErrorCode); diff --git a/unified-runtime/source/adapters/level_zero/platform.hpp b/unified-runtime/source/adapters/level_zero/platform.hpp index ed9c4d38f8fdd..7ae17968e8144 100644 --- a/unified-runtime/source/adapters/level_zero/platform.hpp +++ b/unified-runtime/source/adapters/level_zero/platform.hpp @@ -25,9 +25,10 @@ struct ur_zes_device_handle_data_t { ze_bool_t SubDevice = false; }; -struct ur_platform_handle_t_ : public _ur_platform { +struct ur_platform_handle_t_ : ur::handle_base, + public _ur_platform { ur_platform_handle_t_(ze_driver_handle_t Driver) - : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} + : handle_base(), ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. ur_result_t initialize(); diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp index 28a1301d7254f..56d47decccc8f 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include #include #include @@ -550,105 +551,125 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( } // extern "C" #endif -#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO -namespace ur::level_zero { -ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { +namespace { +ur_result_t populateDdiTable(ur_dditable_t *ddi) { if (ddi == nullptr) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } ur_result_t result; - result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Global); +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#define NAMESPACE_ ::ur::level_zero +#else +#define NAMESPACE_ +#endif + + result = NAMESPACE_::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Global); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetAdapterProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Adapter); + result = NAMESPACE_::urGetAdapterProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Adapter); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetBindlessImagesExpProcAddrTable( + result = NAMESPACE_::urGetBindlessImagesExpProcAddrTable( UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetCommandBufferExpProcAddrTable( + result = NAMESPACE_::urGetCommandBufferExpProcAddrTable( UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Context); + result = NAMESPACE_::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Enqueue); + result = NAMESPACE_::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->EnqueueExp); + result = NAMESPACE_::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Event); + result = + NAMESPACE_::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Event); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Kernel); + result = NAMESPACE_::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->KernelExp); + result = NAMESPACE_::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->KernelExp); if (result != UR_RESULT_SUCCESS) return result; - result = - ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + result = NAMESPACE_::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->PhysicalMem); + result = NAMESPACE_::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Platform); + result = NAMESPACE_::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Program); + result = NAMESPACE_::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->ProgramExp); + result = NAMESPACE_::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Queue); + result = + NAMESPACE_::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Queue); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Sampler); + result = NAMESPACE_::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); if (result != UR_RESULT_SUCCESS) return result; - result = - ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + result = NAMESPACE_::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->USMExp); + result = NAMESPACE_::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->UsmP2PExp); + result = NAMESPACE_::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->VirtualMem); + result = NAMESPACE_::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Device); + result = NAMESPACE_::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); if (result != UR_RESULT_SUCCESS) return result; +#undef NAMESPACE_ + return result; } -} // namespace ur::level_zero +} // namespace + +namespace ur::level_zero { +const ur_dditable_t *ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, []() { populateDdiTable(&table); }); + return &table; +} + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + return populateDdiTable(ddi); +} #endif +} // namespace ur::level_zero diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index 7174fba5757fc..081ab520d7d70 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -8,6 +8,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once + #include #include @@ -797,4 +799,8 @@ ur_result_t urEnqueueNativeCommandExp( #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif + +struct ddi_getter { + const static ur_dditable_t *value(); +}; } // namespace ur::level_zero diff --git a/unified-runtime/source/adapters/mock/ur_mock.cpp b/unified-runtime/source/adapters/mock/ur_mock.cpp index 8af1cbd320927..7f85398abcc47 100644 --- a/unified-runtime/source/adapters/mock/ur_mock.cpp +++ b/unified-runtime/source/adapters/mock/ur_mock.cpp @@ -81,6 +81,15 @@ ur_result_t mock_urDeviceGetInfo(void *pParams) { **params.ppPropSizeRet = sizeof(deviceName); } } break; + case UR_DEVICE_INFO_PLATFORM: + if (*params.ppPropValue != nullptr) { + *reinterpret_cast(*params.ppPropValue) = + driver::d_context.platform; + } + if (*params.ppPropSizeRet != nullptr) { + **params.ppPropSizeRet = sizeof(ur_platform_handle_t); + } + break; default: return UR_RESULT_SUCCESS; } @@ -89,6 +98,28 @@ ur_result_t mock_urDeviceGetInfo(void *pParams) { ////////////////////////////////////////////////////////////////////////// context_t::context_t() { + urGetGlobalProcAddrTable(version, &urDdiTable.Global); + urGetBindlessImagesExpProcAddrTable(version, &urDdiTable.BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(version, &urDdiTable.CommandBufferExp); + urGetContextProcAddrTable(version, &urDdiTable.Context); + urGetEnqueueProcAddrTable(version, &urDdiTable.Enqueue); + urGetEnqueueExpProcAddrTable(version, &urDdiTable.EnqueueExp); + urGetEventProcAddrTable(version, &urDdiTable.Event); + urGetKernelProcAddrTable(version, &urDdiTable.Kernel); + urGetKernelExpProcAddrTable(version, &urDdiTable.KernelExp); + urGetMemProcAddrTable(version, &urDdiTable.Mem); + urGetPhysicalMemProcAddrTable(version, &urDdiTable.PhysicalMem); + urGetPlatformProcAddrTable(version, &urDdiTable.Platform); + urGetProgramProcAddrTable(version, &urDdiTable.Program); + urGetProgramExpProcAddrTable(version, &urDdiTable.ProgramExp); + urGetQueueProcAddrTable(version, &urDdiTable.Queue); + urGetSamplerProcAddrTable(version, &urDdiTable.Sampler); + urGetUSMProcAddrTable(version, &urDdiTable.USM); + urGetUSMExpProcAddrTable(version, &urDdiTable.USMExp); + urGetUsmP2PExpProcAddrTable(version, &urDdiTable.UsmP2PExp); + urGetVirtualMemProcAddrTable(version, &urDdiTable.VirtualMem); + urGetDeviceProcAddrTable(version, &urDdiTable.Device); + mock::getCallbacks().set_replace_callback("urPlatformGetApiVersion", &mock_urPlatformGetApiVersion); // Set the default info stuff as before overrides, this way any application diff --git a/unified-runtime/source/adapters/mock/ur_mock.hpp b/unified-runtime/source/adapters/mock/ur_mock.hpp index 6249645facc8c..7aca43eb234e7 100644 --- a/unified-runtime/source/adapters/mock/ur_mock.hpp +++ b/unified-runtime/source/adapters/mock/ur_mock.hpp @@ -27,9 +27,17 @@ class __urdlllocal context_t { context_t(); ~context_t() = default; - ur_adapter_handle_t adapter = reinterpret_cast(1); - ur_device_handle_t device = reinterpret_cast(2); - ur_platform_handle_t platform = reinterpret_cast(3); + void *fake_adapter = &urDdiTable; + ur_adapter_handle_t adapter = + reinterpret_cast(&fake_adapter); + + void *fake_device = &urDdiTable; + ur_device_handle_t device = + reinterpret_cast(&fake_device); + + void *fake_platform = &urDdiTable; + ur_platform_handle_t platform = + reinterpret_cast(&fake_platform); }; extern context_t d_context; diff --git a/unified-runtime/source/adapters/native_cpu/adapter.cpp b/unified-runtime/source/adapters/native_cpu/adapter.cpp index 9d2f780c95438..6ed08cf054f4e 100644 --- a/unified-runtime/source/adapters/native_cpu/adapter.cpp +++ b/unified-runtime/source/adapters/native_cpu/adapter.cpp @@ -12,7 +12,7 @@ #include "common.hpp" #include "ur_api.h" -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : ur::native_cpu::handle_base { std::atomic RefCount = 0; logger::Logger &logger = logger::get_logger("native_cpu"); } Adapter; diff --git a/unified-runtime/source/adapters/native_cpu/common.hpp b/unified-runtime/source/adapters/native_cpu/common.hpp index 08c572aabc886..71ff7b89e1ff5 100644 --- a/unified-runtime/source/adapters/native_cpu/common.hpp +++ b/unified-runtime/source/adapters/native_cpu/common.hpp @@ -40,12 +40,19 @@ extern thread_local char ErrorMessage[MaxMessageSize]; __FUNCTION__, __LINE__, __FILE__); \ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +namespace ur::native_cpu { +struct ddi_getter { + static const ur_dditable_t *value(); +}; +using handle_base = ur::handle_base; +} // namespace ur::native_cpu + // Todo: replace this with a common helper once it is available -struct RefCounted { +struct RefCounted : ur::native_cpu::handle_base { std::atomic_uint32_t _refCount; uint32_t incrementReferenceCount() { return ++_refCount; } uint32_t decrementReferenceCount() { return --_refCount; } - RefCounted() : _refCount{1} {} + RefCounted() : handle_base(), _refCount{1} {} uint32_t getReferenceCount() const { return _refCount; } }; diff --git a/unified-runtime/source/adapters/native_cpu/device.hpp b/unified-runtime/source/adapters/native_cpu/device.hpp index 2308c1a7f4597..78c3ac7e384cf 100644 --- a/unified-runtime/source/adapters/native_cpu/device.hpp +++ b/unified-runtime/source/adapters/native_cpu/device.hpp @@ -10,10 +10,11 @@ #pragma once +#include "common.hpp" #include "threadpool.hpp" #include -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : ur::native_cpu::handle_base { native_cpu::threadpool_t tp; ur_device_handle_t_(ur_platform_handle_t ArgPlt); diff --git a/unified-runtime/source/adapters/native_cpu/platform.hpp b/unified-runtime/source/adapters/native_cpu/platform.hpp index 6791bba7aa63c..9852cdc1428d6 100644 --- a/unified-runtime/source/adapters/native_cpu/platform.hpp +++ b/unified-runtime/source/adapters/native_cpu/platform.hpp @@ -15,6 +15,6 @@ #include "common.hpp" #include "device.hpp" -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : ur::native_cpu::handle_base { ur_device_handle_t_ TheDevice{this}; }; diff --git a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp index 8543428b4f314..219d152288e02 100644 --- a/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/native_cpu/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -445,4 +446,40 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version, + ur_dditable_t *pDdiTable) { + urGetGlobalProcAddrTable(version, &pDdiTable->Global); + urGetBindlessImagesExpProcAddrTable(version, &pDdiTable->BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(version, &pDdiTable->CommandBufferExp); + urGetContextProcAddrTable(version, &pDdiTable->Context); + urGetEnqueueProcAddrTable(version, &pDdiTable->Enqueue); + urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp); + urGetEventProcAddrTable(version, &pDdiTable->Event); + urGetKernelProcAddrTable(version, &pDdiTable->Kernel); + urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp); + urGetMemProcAddrTable(version, &pDdiTable->Mem); + urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem); + urGetPlatformProcAddrTable(version, &pDdiTable->Platform); + urGetProgramProcAddrTable(version, &pDdiTable->Program); + urGetProgramExpProcAddrTable(version, &pDdiTable->ProgramExp); + urGetQueueProcAddrTable(version, &pDdiTable->Queue); + urGetSamplerProcAddrTable(version, &pDdiTable->Sampler); + urGetUSMProcAddrTable(version, &pDdiTable->USM); + urGetUSMExpProcAddrTable(version, &pDdiTable->USMExp); + urGetUsmP2PExpProcAddrTable(version, &pDdiTable->UsmP2PExp); + urGetVirtualMemProcAddrTable(version, &pDdiTable->VirtualMem); + urGetDeviceProcAddrTable(version, &pDdiTable->Device); + urGetAdapterProcAddrTable(version, &pDdiTable->Adapter); + + return UR_RESULT_SUCCESS; +} } // extern "C" + +const ur_dditable_t *ur::native_cpu::ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, + []() { urAllAddrTable(UR_API_VERSION_CURRENT, &table); }); + return &table; +} diff --git a/unified-runtime/source/adapters/opencl/adapter.cpp b/unified-runtime/source/adapters/opencl/adapter.cpp index a687a7f597d9d..a7f0e3bcf9414 100644 --- a/unified-runtime/source/adapters/opencl/adapter.cpp +++ b/unified-runtime/source/adapters/opencl/adapter.cpp @@ -23,7 +23,7 @@ // it. static ur_adapter_handle_t liveAdapter = nullptr; -ur_adapter_handle_t_::ur_adapter_handle_t_() { +ur_adapter_handle_t_::ur_adapter_handle_t_() : handle_base() { #ifdef _MSC_VER // Loading OpenCL.dll increments the libraries internal reference count. diff --git a/unified-runtime/source/adapters/opencl/adapter.hpp b/unified-runtime/source/adapters/opencl/adapter.hpp index 546a803cdc538..939227321b396 100644 --- a/unified-runtime/source/adapters/opencl/adapter.hpp +++ b/unified-runtime/source/adapters/opencl/adapter.hpp @@ -17,7 +17,7 @@ #include "common.hpp" #include "logger/ur_logger.hpp" -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : ur::opencl::handle_base { ur_adapter_handle_t_(); ~ur_adapter_handle_t_(); diff --git a/unified-runtime/source/adapters/opencl/command_buffer.hpp b/unified-runtime/source/adapters/opencl/command_buffer.hpp index 6fbdc5d259827..bc46d1910ed73 100644 --- a/unified-runtime/source/adapters/opencl/command_buffer.hpp +++ b/unified-runtime/source/adapters/opencl/command_buffer.hpp @@ -8,11 +8,12 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include /// Handle to a kernel command. -struct ur_exp_command_buffer_command_handle_t_ { +struct ur_exp_command_buffer_command_handle_t_ : ur::opencl::handle_base { /// Command-buffer this command belongs to. ur_exp_command_buffer_handle_t hCommandBuffer; /// OpenCL command-handle. @@ -28,13 +29,13 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t hCommandBuffer, cl_mutable_command_khr CLMutableCommand, ur_kernel_handle_t Kernel, cl_uint WorkDim, bool UserDefinedLocalSize) - : hCommandBuffer(hCommandBuffer), CLMutableCommand(CLMutableCommand), - Kernel(Kernel), WorkDim(WorkDim), + : handle_base(), hCommandBuffer(hCommandBuffer), + CLMutableCommand(CLMutableCommand), Kernel(Kernel), WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize) {} }; /// Handle to a command-buffer object. -struct ur_exp_command_buffer_handle_t_ { +struct ur_exp_command_buffer_handle_t_ : ur::opencl::handle_base { /// UR queue belonging to the command-buffer, required for OpenCL creation. ur_queue_handle_t hInternalQueue; /// Context the command-buffer is created for. @@ -61,9 +62,10 @@ struct ur_exp_command_buffer_handle_t_ { ur_device_handle_t hDevice, cl_command_buffer_khr CLCommandBuffer, bool IsUpdatable, bool IsInOrder) - : hInternalQueue(hQueue), hContext(hContext), hDevice(hDevice), - CLCommandBuffer(CLCommandBuffer), IsUpdatable(IsUpdatable), - IsInOrder(IsInOrder), IsFinalized(false), RefCount(0) {} + : handle_base(), hInternalQueue(hQueue), hContext(hContext), + hDevice(hDevice), CLCommandBuffer(CLCommandBuffer), + IsUpdatable(IsUpdatable), IsInOrder(IsInOrder), IsFinalized(false), + RefCount(0) {} ~ur_exp_command_buffer_handle_t_(); diff --git a/unified-runtime/source/adapters/opencl/common.hpp b/unified-runtime/source/adapters/opencl/common.hpp index 0be4851cc785e..7bb4b7fda74c2 100644 --- a/unified-runtime/source/adapters/opencl/common.hpp +++ b/unified-runtime/source/adapters/opencl/common.hpp @@ -158,6 +158,13 @@ extern thread_local char ErrorMessage[MaxMessageSize]; ur_result_t ErrorCode); } // namespace cl_adapter +namespace ur::opencl { +struct ddi_getter { + const static ur_dditable_t *value(); +}; +using handle_base = ur::handle_base; +} // namespace ur::opencl + namespace cl_ext { // Older versions of GCC don't like "const" here #if defined(__GNUC__) && (__GNUC__ < 7 || (__GNU__C == 7 && __GNUC_MINOR__ < 2)) diff --git a/unified-runtime/source/adapters/opencl/context.hpp b/unified-runtime/source/adapters/opencl/context.hpp index 75282305aa408..6d81abc0fe2ba 100644 --- a/unified-runtime/source/adapters/opencl/context.hpp +++ b/unified-runtime/source/adapters/opencl/context.hpp @@ -15,7 +15,7 @@ #include -struct ur_context_handle_t_ { +struct ur_context_handle_t_ : ur::opencl::handle_base { using native_type = cl_context; native_type CLContext; std::vector Devices; @@ -25,7 +25,7 @@ struct ur_context_handle_t_ { ur_context_handle_t_(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices) - : CLContext(Ctx), DeviceCount(DevCount) { + : handle_base(), CLContext(Ctx), DeviceCount(DevCount) { for (uint32_t i = 0; i < DeviceCount; i++) { Devices.emplace_back(phDevices[i]); urDeviceRetain(phDevices[i]); diff --git a/unified-runtime/source/adapters/opencl/device.hpp b/unified-runtime/source/adapters/opencl/device.hpp index 470599c2eb33c..1c100535c6643 100644 --- a/unified-runtime/source/adapters/opencl/device.hpp +++ b/unified-runtime/source/adapters/opencl/device.hpp @@ -13,7 +13,7 @@ #include "device.hpp" #include "platform.hpp" -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : ur::opencl::handle_base { using native_type = cl_device_id; native_type CLDevice; ur_platform_handle_t Platform; @@ -24,7 +24,7 @@ struct ur_device_handle_t_ { ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, ur_device_handle_t Parent) - : CLDevice(Dev), Platform(Plat), ParentDevice(Parent) { + : handle_base(), CLDevice(Dev), Platform(Plat), ParentDevice(Parent) { RefCount = 1; if (Parent) { Type = Parent->Type; diff --git a/unified-runtime/source/adapters/opencl/event.hpp b/unified-runtime/source/adapters/opencl/event.hpp index 87d820a9b110a..0de02a3ef0266 100644 --- a/unified-runtime/source/adapters/opencl/event.hpp +++ b/unified-runtime/source/adapters/opencl/event.hpp @@ -14,7 +14,7 @@ #include -struct ur_event_handle_t_ { +struct ur_event_handle_t_ : ur::opencl::handle_base { using native_type = cl_event; native_type CLEvent; ur_context_handle_t Context; @@ -24,7 +24,7 @@ struct ur_event_handle_t_ { ur_event_handle_t_(native_type Event, ur_context_handle_t Ctx, ur_queue_handle_t Queue) - : CLEvent(Event), Context(Ctx), Queue(Queue) { + : handle_base(), CLEvent(Event), Context(Ctx), Queue(Queue) { RefCount = 1; urContextRetain(Context); if (Queue) { diff --git a/unified-runtime/source/adapters/opencl/kernel.hpp b/unified-runtime/source/adapters/opencl/kernel.hpp index 2b3c2dbe8464b..281ac6b5a3680 100644 --- a/unified-runtime/source/adapters/opencl/kernel.hpp +++ b/unified-runtime/source/adapters/opencl/kernel.hpp @@ -15,7 +15,7 @@ #include -struct ur_kernel_handle_t_ { +struct ur_kernel_handle_t_ : ur::opencl::handle_base { using native_type = cl_kernel; native_type CLKernel; ur_program_handle_t Program; @@ -25,7 +25,7 @@ struct ur_kernel_handle_t_ { ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, ur_context_handle_t Context) - : CLKernel(Kernel), Program(Program), Context(Context) { + : handle_base(), CLKernel(Kernel), Program(Program), Context(Context) { RefCount = 1; urProgramRetain(Program); urContextRetain(Context); diff --git a/unified-runtime/source/adapters/opencl/memory.hpp b/unified-runtime/source/adapters/opencl/memory.hpp index cb4493570a7cf..a0f8410e3df03 100644 --- a/unified-runtime/source/adapters/opencl/memory.hpp +++ b/unified-runtime/source/adapters/opencl/memory.hpp @@ -14,7 +14,7 @@ #include -struct ur_mem_handle_t_ { +struct ur_mem_handle_t_ : ur::opencl::handle_base { using native_type = cl_mem; native_type CLMemory; ur_context_handle_t Context; @@ -22,7 +22,7 @@ struct ur_mem_handle_t_ { bool IsNativeHandleOwned = true; ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) - : CLMemory(Mem), Context(Ctx) { + : handle_base(), CLMemory(Mem), Context(Ctx) { RefCount = 1; urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/opencl/physical_mem.hpp b/unified-runtime/source/adapters/opencl/physical_mem.hpp index fc7020e59d0b6..f6a39fe6ab50f 100644 --- a/unified-runtime/source/adapters/opencl/physical_mem.hpp +++ b/unified-runtime/source/adapters/opencl/physical_mem.hpp @@ -9,8 +9,10 @@ //===----------------------------------------------------------------------===// #pragma once +#include "common.hpp" + /// UR queue mapping on physical memory allocations used in virtual memory /// management. /// TODO: Implement. /// -struct ur_physical_mem_handle_t_ {}; +struct ur_physical_mem_handle_t_ : ur::opencl::handle_base {}; diff --git a/unified-runtime/source/adapters/opencl/platform.hpp b/unified-runtime/source/adapters/opencl/platform.hpp index f8db01814e1f5..d82ff973de119 100644 --- a/unified-runtime/source/adapters/opencl/platform.hpp +++ b/unified-runtime/source/adapters/opencl/platform.hpp @@ -15,14 +15,14 @@ struct ur_device_handle_t_; -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : ur::opencl::handle_base { using native_type = cl_platform_id; native_type CLPlatform = nullptr; std::vector> Devices; std::map SubDevices; std::mutex SubDevicesLock; - ur_platform_handle_t_(native_type Plat) : CLPlatform(Plat) {} + ur_platform_handle_t_(native_type Plat) : handle_base(), CLPlatform(Plat) {} ~ur_platform_handle_t_() { for (auto &Dev : Devices) { diff --git a/unified-runtime/source/adapters/opencl/program.hpp b/unified-runtime/source/adapters/opencl/program.hpp index c8857fb12f9cd..69b3430d2bc3a 100644 --- a/unified-runtime/source/adapters/opencl/program.hpp +++ b/unified-runtime/source/adapters/opencl/program.hpp @@ -14,7 +14,7 @@ #include -struct ur_program_handle_t_ { +struct ur_program_handle_t_ : ur::opencl::handle_base { using native_type = cl_program; native_type CLProgram; ur_context_handle_t Context; @@ -25,7 +25,7 @@ struct ur_program_handle_t_ { ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx, uint32_t NumDevices, ur_device_handle_t *Devs) - : CLProgram(Prog), Context(Ctx), NumDevices(NumDevices) { + : handle_base(), CLProgram(Prog), Context(Ctx), NumDevices(NumDevices) { RefCount = 1; urContextRetain(Context); for (uint32_t i = 0; i < NumDevices; i++) { diff --git a/unified-runtime/source/adapters/opencl/queue.hpp b/unified-runtime/source/adapters/opencl/queue.hpp index dbfa4a4d8bdb0..239be248a2f93 100644 --- a/unified-runtime/source/adapters/opencl/queue.hpp +++ b/unified-runtime/source/adapters/opencl/queue.hpp @@ -15,7 +15,7 @@ #include -struct ur_queue_handle_t_ { +struct ur_queue_handle_t_ : ur::opencl::handle_base { using native_type = cl_command_queue; native_type CLQueue; ur_context_handle_t Context; @@ -27,7 +27,7 @@ struct ur_queue_handle_t_ { ur_queue_handle_t_(native_type Queue, ur_context_handle_t Ctx, ur_device_handle_t Dev) - : CLQueue(Queue), Context(Ctx), Device(Dev) { + : handle_base(), CLQueue(Queue), Context(Ctx), Device(Dev) { RefCount = 1; urDeviceRetain(Device); urContextRetain(Context); diff --git a/unified-runtime/source/adapters/opencl/sampler.hpp b/unified-runtime/source/adapters/opencl/sampler.hpp index 844020ddbea67..b661fe195bcb2 100644 --- a/unified-runtime/source/adapters/opencl/sampler.hpp +++ b/unified-runtime/source/adapters/opencl/sampler.hpp @@ -13,7 +13,7 @@ #include -struct ur_sampler_handle_t_ { +struct ur_sampler_handle_t_ : ur::opencl::handle_base { using native_type = cl_sampler; native_type CLSampler; ur_context_handle_t Context; @@ -21,7 +21,7 @@ struct ur_sampler_handle_t_ { bool IsNativeHandleOwned = false; ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) - : CLSampler(Sampler), Context(Ctx) { + : handle_base(), CLSampler(Sampler), Context(Ctx) { RefCount = 1; urContextRetain(Context); } diff --git a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp index 9ef88e97f6dcb..52a742e2781c9 100644 --- a/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp +++ b/unified-runtime/source/adapters/opencl/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -453,6 +454,43 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( return UR_RESULT_SUCCESS; } +UR_DLLEXPORT ur_result_t UR_APICALL urAllAddrTable(ur_api_version_t version, + ur_dditable_t *pDdiTable) { + urGetGlobalProcAddrTable(version, &pDdiTable->Global); + urGetBindlessImagesExpProcAddrTable(version, &pDdiTable->BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(version, &pDdiTable->CommandBufferExp); + urGetContextProcAddrTable(version, &pDdiTable->Context); + urGetEnqueueProcAddrTable(version, &pDdiTable->Enqueue); + urGetEnqueueExpProcAddrTable(version, &pDdiTable->EnqueueExp); + urGetEventProcAddrTable(version, &pDdiTable->Event); + urGetKernelProcAddrTable(version, &pDdiTable->Kernel); + urGetKernelExpProcAddrTable(version, &pDdiTable->KernelExp); + urGetMemProcAddrTable(version, &pDdiTable->Mem); + urGetPhysicalMemProcAddrTable(version, &pDdiTable->PhysicalMem); + urGetPlatformProcAddrTable(version, &pDdiTable->Platform); + urGetProgramProcAddrTable(version, &pDdiTable->Program); + urGetProgramExpProcAddrTable(version, &pDdiTable->ProgramExp); + urGetQueueProcAddrTable(version, &pDdiTable->Queue); + urGetSamplerProcAddrTable(version, &pDdiTable->Sampler); + urGetUSMProcAddrTable(version, &pDdiTable->USM); + urGetUSMExpProcAddrTable(version, &pDdiTable->USMExp); + urGetUsmP2PExpProcAddrTable(version, &pDdiTable->UsmP2PExp); + urGetVirtualMemProcAddrTable(version, &pDdiTable->VirtualMem); + urGetDeviceProcAddrTable(version, &pDdiTable->Device); + urGetAdapterProcAddrTable(version, &pDdiTable->Adapter); + + return UR_RESULT_SUCCESS; +} + #if defined(__cplusplus) } // extern "C" #endif + +const ur_dditable_t *ur::opencl::ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, + []() { urAllAddrTable(UR_API_VERSION_CURRENT, &table); }); + return &table; +} diff --git a/unified-runtime/source/common/ur_singleton.hpp b/unified-runtime/source/common/ur_singleton.hpp deleted file mode 100644 index 6cfdcb972fdf7..0000000000000 --- a/unified-runtime/source/common/ur_singleton.hpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#ifndef UR_SINGLETON_H -#define UR_SINGLETON_H 1 - -#include -#include -#include - -////////////////////////////////////////////////////////////////////////// -/// a abstract factory for creation of singleton objects -template class singleton_factory_t { - struct entry_t { - std::unique_ptr ptr; - size_t ref_count; - }; - -protected: - using singleton_t = singleton_tn; - using key_t = typename std::conditional::value, - size_t, key_tn>::type; - - using ptr_t = std::unique_ptr; - using map_t = std::unordered_map; - - /// lock for thread-safety - std::mutex mut; - /// single instance of singleton for each unique key - map_t map; - ////////////////////////////////////////////////////////////////////////// - /// extract the key from parameter list and if necessary, convert type - template - key_t getKey(key_tn key, [[maybe_unused]] Ts &&...params) { - return reinterpret_cast(key); - } - -public: - ////////////////////////////////////////////////////////////////////////// - /// default ctor/dtor - singleton_factory_t() = default; - ~singleton_factory_t() = default; - - ////////////////////////////////////////////////////////////////////////// - /// gets a pointer to a unique instance of singleton - /// if no instance exists, then creates a new instance - /// the params are forwarded to the ctor of the singleton - /// the first parameter must be the unique identifier of the instance - template singleton_tn *getInstance(Ts &&...params) { - auto key = getKey(params...); - - if (key == 0) { // No zero keys allowed in map - return static_cast(0); - } - - std::lock_guard lk(mut); - auto iter = map.find(key); - - if (map.end() == iter) { - auto ptr = std::make_unique(std::forward(params)...); - iter = map.emplace(key, entry_t{std::move(ptr), 0}).first; - } else { - iter->second.ref_count++; - } - return iter->second.ptr.get(); - } - - void retain(key_tn key) { - std::lock_guard lk(mut); - auto iter = map.find(getKey(key)); - if (iter != map.end()) { - iter->second.ref_count++; - } - } - - ////////////////////////////////////////////////////////////////////////// - /// once the key is no longer valid, release the singleton - void release(key_tn key) { - std::lock_guard lk(mut); - auto iter = map.find(getKey(key)); - if (iter != map.end()) { - if (iter->second.ref_count == 0) { - map.erase(iter); - } else { - iter->second.ref_count--; - } - } - } - - void clear() { - std::lock_guard lk(mut); - map.clear(); - } -}; - -#endif /* UR_SINGLETON_H */ diff --git a/unified-runtime/source/loader/CMakeLists.txt b/unified-runtime/source/loader/CMakeLists.txt index cf72ac10029be..84f55fb2d82b6 100644 --- a/unified-runtime/source/loader/CMakeLists.txt +++ b/unified-runtime/source/loader/CMakeLists.txt @@ -104,11 +104,9 @@ endif() target_sources(ur_loader PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/ur_object.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_loader.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_ldrddi.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ur_ldrddi.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_libapi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_libddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_lib.hpp diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index 23fa016df8c4a..8defbd54c6a77 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -31,25 +31,16 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( ur_adapter_handle_t *phAdapters, /// [out][optional] returns the total number of adapters available. uint32_t *pNumAdapters) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto context = getContext(); size_t adapterIndex = 0; if (nullptr != phAdapters && NumEntries != 0) { for (auto &platform : context->platforms) { if (platform.initStatus != UR_RESULT_SUCCESS) continue; - platform.dditable.ur.Global.pfnAdapterGet(1, &phAdapters[adapterIndex], - nullptr); - try { - phAdapters[adapterIndex] = reinterpret_cast( - context->factories.ur_adapter_factory.getInstance( - phAdapters[adapterIndex], &platform.dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - break; - } + platform.dditable.Global.pfnAdapterGet(1, &phAdapters[adapterIndex], + nullptr); adapterIndex++; if (adapterIndex == NumEntries) { break; @@ -61,7 +52,7 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( *pNumAdapters = static_cast(context->platforms.size()); } - return result; + return UR_RESULT_SUCCESS; } /////////////////////////////////////////////////////////////////////////////// @@ -69,26 +60,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( __urdlllocal ur_result_t UR_APICALL urAdapterRelease( /// [in][release] Adapter handle to release ur_adapter_handle_t hAdapter) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterRelease = dditable->ur.Global.pfnAdapterRelease; + auto *pfnAdapterRelease = dditable->Global.pfnAdapterRelease; if (nullptr == pfnAdapterRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = pfnAdapterRelease(hAdapter); - - // release loader handle - context->factories.ur_adapter_factory.release(hAdapter); - - return result; + return pfnAdapterRelease(hAdapter); } /////////////////////////////////////////////////////////////////////////////// @@ -96,26 +76,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRelease( __urdlllocal ur_result_t UR_APICALL urAdapterRetain( /// [in][retain] Adapter handle to retain ur_adapter_handle_t hAdapter) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterRetain = dditable->ur.Global.pfnAdapterRetain; + auto *pfnAdapterRetain = dditable->Global.pfnAdapterRetain; if (nullptr == pfnAdapterRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = pfnAdapterRetain(hAdapter); - - // increment refcount of handle - context->factories.ur_adapter_factory.retain(hAdapter); - - return result; + return pfnAdapterRetain(hAdapter); } /////////////////////////////////////////////////////////////////////////////// @@ -129,23 +98,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGetLastError( /// [out] pointer to an integer where the adapter specific error code will /// be stored. int32_t *pError) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterGetLastError = dditable->ur.Global.pfnAdapterGetLastError; + auto *pfnAdapterGetLastError = dditable->Global.pfnAdapterGetLastError; if (nullptr == pfnAdapterGetLastError) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = pfnAdapterGetLastError(hAdapter, ppMessage, pError); - - return result; + return pfnAdapterGetLastError(hAdapter, ppMessage, pError); } /////////////////////////////////////////////////////////////////////////////// @@ -166,24 +127,16 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGetInfo( /// [out][optional] pointer to the actual number of bytes being queried by /// pPropValue. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterGetInfo = dditable->ur.Global.pfnAdapterGetInfo; + auto *pfnAdapterGetInfo = dditable->Global.pfnAdapterGetInfo; if (nullptr == pfnAdapterGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = - pfnAdapterGetInfo(hAdapter, propName, propSize, pPropValue, pPropSizeRet); - - return result; + return pfnAdapterGetInfo(hAdapter, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -197,23 +150,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterSetLoggerCallback( void *pUserData, /// [in] logging level ur_logger_level_t level) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnSetLoggerCallback = dditable->ur.Adapter.pfnSetLoggerCallback; + auto *pfnSetLoggerCallback = dditable->Adapter.pfnSetLoggerCallback; if (nullptr == pfnSetLoggerCallback) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = pfnSetLoggerCallback(hAdapter, pfnLoggerCallback, pUserData, level); - - return result; + return pfnSetLoggerCallback(hAdapter, pfnLoggerCallback, pUserData, level); } /////////////////////////////////////////////////////////////////////////////// @@ -223,24 +168,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterSetLoggerCallbackLevel( ur_adapter_handle_t hAdapter, /// [in] logging level ur_logger_level_t level) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnSetLoggerCallbackLevel = - dditable->ur.Adapter.pfnSetLoggerCallbackLevel; + auto *pfnSetLoggerCallbackLevel = dditable->Adapter.pfnSetLoggerCallbackLevel; if (nullptr == pfnSetLoggerCallbackLevel) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = pfnSetLoggerCallbackLevel(hAdapter, level); - - return result; + return pfnSetLoggerCallbackLevel(hAdapter, level); } /////////////////////////////////////////////////////////////////////////////// @@ -259,44 +195,15 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGet( ur_platform_handle_t *phPlatforms, /// [out][optional] returns the total number of platforms available. uint32_t *pNumPlatforms) { - ur_result_t result = UR_RESULT_SUCCESS; - - [[maybe_unused]] auto context = getContext(); - // extract adapter's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; + auto *dditable = *reinterpret_cast(hAdapter); - uint32_t library_platform_handle_count = 0; - - result = dditable->ur.Platform.pfnGet(hAdapter, 0, nullptr, - &library_platform_handle_count); - if (UR_RESULT_SUCCESS != result) - return result; - - if (nullptr != phPlatforms && NumEntries != 0) { - if (library_platform_handle_count > NumEntries) { - library_platform_handle_count = NumEntries; - } - result = dditable->ur.Platform.pfnGet( - hAdapter, library_platform_handle_count, phPlatforms, nullptr); - if (UR_RESULT_SUCCESS != result) - return result; - - try { - for (uint32_t i = 0; i < library_platform_handle_count; ++i) { - phPlatforms[i] = reinterpret_cast( - context->factories.ur_platform_factory.getInstance(phPlatforms[i], - dditable)); - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - } - - if (UR_RESULT_SUCCESS == result && pNumPlatforms != nullptr) - *pNumPlatforms = library_platform_handle_count; + auto *pfnGet = dditable->Platform.pfnGet; + if (nullptr == pfnGet) + return UR_RESULT_ERROR_UNINITIALIZED; - return result; + // forward to device-platform + return pfnGet(hAdapter, NumEntries, phPlatforms, pNumPlatforms); } /////////////////////////////////////////////////////////////////////////////// @@ -317,54 +224,15 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetInfo( /// [out][optional] pointer to the actual number of bytes being queried by /// pPlatformInfo. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPlatform); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetInfo = dditable->ur.Platform.pfnGetInfo; + auto *pfnGetInfo = dditable->Platform.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hPlatform, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_PLATFORM_INFO_ADAPTER: { - ur_adapter_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_adapter_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_adapter_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hPlatform, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -374,23 +242,15 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetApiVersion( ur_platform_handle_t hPlatform, /// [out] api version ur_api_version_t *pVersion) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPlatform); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetApiVersion = dditable->ur.Platform.pfnGetApiVersion; + auto *pfnGetApiVersion = dditable->Platform.pfnGetApiVersion; if (nullptr == pfnGetApiVersion) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform - result = pfnGetApiVersion(hPlatform, pVersion); - - return result; + return pfnGetApiVersion(hPlatform, pVersion); } /////////////////////////////////////////////////////////////////////////////// @@ -400,26 +260,15 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetNativeHandle( ur_platform_handle_t hPlatform, /// [out] a pointer to the native handle of the platform. ur_native_handle_t *phNativePlatform) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPlatform); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetNativeHandle = dditable->ur.Platform.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Platform.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hPlatform, phNativePlatform); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hPlatform, phNativePlatform); } /////////////////////////////////////////////////////////////////////////////// @@ -433,37 +282,17 @@ __urdlllocal ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( const ur_platform_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the platform object created. ur_platform_handle_t *phPlatform) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Platform.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = + dditable->Platform.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = pfnCreateWithNativeHandle(hNativePlatform, hAdapter, pProperties, - phPlatform); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phPlatform = reinterpret_cast( - context->factories.ur_platform_factory.getInstance(*phPlatform, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithNativeHandle(hNativePlatform, hAdapter, pProperties, + phPlatform); } /////////////////////////////////////////////////////////////////////////////// @@ -476,23 +305,15 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetBackendOption( /// [out] returns the correct platform specific compiler option based on /// the frontend option. const char **ppPlatformOption) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPlatform); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetBackendOption = dditable->ur.Platform.pfnGetBackendOption; + auto *pfnGetBackendOption = dditable->Platform.pfnGetBackendOption; if (nullptr == pfnGetBackendOption) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform - result = pfnGetBackendOption(hPlatform, pFrontendOption, ppPlatformOption); - - return result; + return pfnGetBackendOption(hPlatform, pFrontendOption, ppPlatformOption); } /////////////////////////////////////////////////////////////////////////////// @@ -514,36 +335,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( /// [out][optional] pointer to the number of devices. /// pNumDevices will be updated with the total number of devices available. uint32_t *pNumDevices) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPlatform); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGet = dditable->ur.Device.pfnGet; + auto *pfnGet = dditable->Device.pfnGet; if (nullptr == pfnGet) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform - result = pfnGet(hPlatform, DeviceType, NumEntries, phDevices, pNumDevices); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handles to loader handles - for (size_t i = 0; (nullptr != phDevices) && (i < NumEntries); ++i) - phDevices[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(phDevices[i], - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnGet(hPlatform, DeviceType, NumEntries, phDevices, pNumDevices); } /////////////////////////////////////////////////////////////////////////////// @@ -565,90 +365,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetInfo = dditable->ur.Device.pfnGetInfo; + auto *pfnGetInfo = dditable->Device.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hDevice, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_DEVICE_INFO_PLATFORM: { - ur_platform_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_platform_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_platform_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_DEVICE_INFO_PARENT_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_DEVICE_INFO_COMPONENT_DEVICES: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_DEVICE_INFO_COMPOSITE_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hDevice, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -656,26 +381,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( __urdlllocal ur_result_t UR_APICALL urDeviceRetain( /// [in][retain] handle of the device to get a reference of. ur_device_handle_t hDevice) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnRetain = dditable->ur.Device.pfnRetain; + auto *pfnRetain = dditable->Device.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnRetain(hDevice); - - // increment refcount of handle - context->factories.ur_device_factory.retain(hDevice); - - return result; + return pfnRetain(hDevice); } /////////////////////////////////////////////////////////////////////////////// @@ -683,26 +397,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRetain( __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// [in][release] handle of the device to release. ur_device_handle_t hDevice) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnRelease = dditable->ur.Device.pfnRelease; + auto *pfnRelease = dditable->Device.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnRelease(hDevice); - - // release loader handle - context->factories.ur_device_factory.release(hDevice); - - return result; + return pfnRelease(hDevice); } /////////////////////////////////////////////////////////////////////////////// @@ -721,37 +424,16 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( /// [out][optional] pointer to the number of sub-devices the device can be /// partitioned into according to the partitioning property. uint32_t *pNumDevicesRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnPartition = dditable->ur.Device.pfnPartition; + auto *pfnPartition = dditable->Device.pfnPartition; if (nullptr == pfnPartition) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, - pNumDevicesRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handles to loader handles - for (size_t i = 0; (nullptr != phSubDevices) && (i < NumDevices); ++i) - phSubDevices[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(phSubDevices[i], - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, + pNumDevicesRet); } /////////////////////////////////////////////////////////////////////////////// @@ -769,23 +451,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceSelectBinary( /// If a suitable binary was not found the function returns /// ::UR_RESULT_ERROR_INVALID_BINARY. uint32_t *pSelectedBinary) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnSelectBinary = dditable->ur.Device.pfnSelectBinary; + auto *pfnSelectBinary = dditable->Device.pfnSelectBinary; if (nullptr == pfnSelectBinary) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnSelectBinary(hDevice, pBinaries, NumBinaries, pSelectedBinary); - - return result; + return pfnSelectBinary(hDevice, pBinaries, NumBinaries, pSelectedBinary); } /////////////////////////////////////////////////////////////////////////////// @@ -795,26 +469,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, /// [out] a pointer to the native handle of the device. ur_native_handle_t *phNativeDevice) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetNativeHandle = dditable->ur.Device.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Device.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hDevice, phNativeDevice); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hDevice, phNativeDevice); } /////////////////////////////////////////////////////////////////////////////// @@ -828,36 +491,16 @@ __urdlllocal ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( const ur_device_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the device object created. ur_device_handle_t *phDevice) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Device.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Device.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform - result = - pfnCreateWithNativeHandle(hNativeDevice, hAdapter, pProperties, phDevice); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phDevice = reinterpret_cast( - context->factories.ur_device_factory.getInstance(*phDevice, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithNativeHandle(hNativeDevice, hAdapter, pProperties, + phDevice); } /////////////////////////////////////////////////////////////////////////////// @@ -871,23 +514,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( /// [out][optional] pointer to the Host's global timestamp that /// correlates with the Device's global timestamp value uint64_t *pHostTimestamp) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetGlobalTimestamps = dditable->ur.Device.pfnGetGlobalTimestamps; + auto *pfnGetGlobalTimestamps = dditable->Device.pfnGetGlobalTimestamps; if (nullptr == pfnGetGlobalTimestamps) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnGetGlobalTimestamps(hDevice, pDeviceTimestamp, pHostTimestamp); - - return result; + return pfnGetGlobalTimestamps(hDevice, pDeviceTimestamp, pHostTimestamp); } /////////////////////////////////////////////////////////////////////////////// @@ -901,39 +536,15 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate( const ur_context_properties_t *pProperties, /// [out][alloc] pointer to handle of context object created ur_context_handle_t *phContext) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(phDevices[0]); - // extract platform's function pointer table - auto dditable = reinterpret_cast(*phDevices)->dditable; - auto pfnCreate = dditable->ur.Context.pfnCreate; + auto *pfnCreate = dditable->Context.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(DeviceCount); - for (size_t i = 0; i < DeviceCount; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = - pfnCreate(DeviceCount, phDevicesLocal.data(), pProperties, phContext); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phContext = reinterpret_cast( - context->factories.ur_context_factory.getInstance(*phContext, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreate(DeviceCount, phDevices, pProperties, phContext); } /////////////////////////////////////////////////////////////////////////////// @@ -941,26 +552,15 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate( __urdlllocal ur_result_t UR_APICALL urContextRetain( /// [in][retain] handle of the context to get a reference of. ur_context_handle_t hContext) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnRetain = dditable->ur.Context.pfnRetain; + auto *pfnRetain = dditable->Context.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnRetain(hContext); - - // increment refcount of handle - context->factories.ur_context_factory.retain(hContext); - - return result; + return pfnRetain(hContext); } /////////////////////////////////////////////////////////////////////////////// @@ -968,26 +568,15 @@ __urdlllocal ur_result_t UR_APICALL urContextRetain( __urdlllocal ur_result_t UR_APICALL urContextRelease( /// [in][release] handle of the context to release. ur_context_handle_t hContext) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnRelease = dditable->ur.Context.pfnRelease; + auto *pfnRelease = dditable->Context.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnRelease(hContext); - - // release loader handle - context->factories.ur_context_factory.release(hContext); - - return result; + return pfnRelease(hContext); } /////////////////////////////////////////////////////////////////////////////// @@ -1009,54 +598,15 @@ __urdlllocal ur_result_t UR_APICALL urContextGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetInfo = dditable->ur.Context.pfnGetInfo; + auto *pfnGetInfo = dditable->Context.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hContext, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_CONTEXT_INFO_DEVICES: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hContext, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -1066,26 +616,15 @@ __urdlllocal ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, /// [out] a pointer to the native handle of the context. ur_native_handle_t *phNativeContext) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetNativeHandle = dditable->ur.Context.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Context.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hContext, phNativeContext); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hContext, phNativeContext); } /////////////////////////////////////////////////////////////////////////////// @@ -1104,44 +643,16 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( const ur_context_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the context object created. ur_context_handle_t *phContext) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hAdapter); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Context.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Context.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = - pfnCreateWithNativeHandle(hNativeContext, hAdapter, numDevices, - phDevicesLocal.data(), pProperties, phContext); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phContext = reinterpret_cast( - context->factories.ur_context_factory.getInstance(*phContext, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithNativeHandle(hNativeContext, hAdapter, numDevices, + phDevices, pProperties, phContext); } /////////////////////////////////////////////////////////////////////////////// @@ -1153,23 +664,15 @@ __urdlllocal ur_result_t UR_APICALL urContextSetExtendedDeleter( ur_context_extended_deleter_t pfnDeleter, /// [in][out][optional] pointer to data to be passed to callback. void *pUserData) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSetExtendedDeleter = dditable->ur.Context.pfnSetExtendedDeleter; + auto *pfnSetExtendedDeleter = dditable->Context.pfnSetExtendedDeleter; if (nullptr == pfnSetExtendedDeleter) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnSetExtendedDeleter(hContext, pfnDeleter, pUserData); - - return result; + return pfnSetExtendedDeleter(hContext, pfnDeleter, pUserData); } /////////////////////////////////////////////////////////////////////////////// @@ -1187,35 +690,16 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreate( void *pHost, /// [out][alloc] pointer to handle of image object created ur_mem_handle_t *phMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageCreate = dditable->ur.Mem.pfnImageCreate; + auto *pfnImageCreate = dditable->Mem.pfnImageCreate; if (nullptr == pfnImageCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = - pfnImageCreate(hContext, flags, pImageFormat, pImageDesc, pHost, phMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnImageCreate(hContext, flags, pImageFormat, pImageDesc, pHost, + phMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1231,34 +715,15 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate( const ur_buffer_properties_t *pProperties, /// [out][alloc] pointer to handle of the memory buffer created ur_mem_handle_t *phBuffer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBufferCreate = dditable->ur.Mem.pfnBufferCreate; + auto *pfnBufferCreate = dditable->Mem.pfnBufferCreate; if (nullptr == pfnBufferCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnBufferCreate(hContext, flags, size, pProperties, phBuffer); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phBuffer = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phBuffer, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnBufferCreate(hContext, flags, size, pProperties, phBuffer); } /////////////////////////////////////////////////////////////////////////////// @@ -1266,26 +731,15 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate( __urdlllocal ur_result_t UR_APICALL urMemRetain( /// [in][retain] handle of the memory object to get access ur_mem_handle_t hMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hMem); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMem)->dditable; - auto pfnRetain = dditable->ur.Mem.pfnRetain; + auto *pfnRetain = dditable->Mem.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - // forward to device-platform - result = pfnRetain(hMem); - - // increment refcount of handle - context->factories.ur_mem_factory.retain(hMem); - - return result; + return pfnRetain(hMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1293,26 +747,15 @@ __urdlllocal ur_result_t UR_APICALL urMemRetain( __urdlllocal ur_result_t UR_APICALL urMemRelease( /// [in][release] handle of the memory object to release ur_mem_handle_t hMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hMem); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMem)->dditable; - auto pfnRelease = dditable->ur.Mem.pfnRelease; + auto *pfnRelease = dditable->Mem.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - // forward to device-platform - result = pfnRelease(hMem); - - // release loader handle - context->factories.ur_mem_factory.release(hMem); - - return result; + return pfnRelease(hMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1328,34 +771,15 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferPartition( const ur_buffer_region_t *pRegion, /// [out] pointer to the handle of sub buffer created ur_mem_handle_t *phMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hBuffer); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hBuffer)->dditable; - auto pfnBufferPartition = dditable->ur.Mem.pfnBufferPartition; + auto *pfnBufferPartition = dditable->Mem.pfnBufferPartition; if (nullptr == pfnBufferPartition) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - // forward to device-platform - result = pfnBufferPartition(hBuffer, flags, bufferCreateType, pRegion, phMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnBufferPartition(hBuffer, flags, bufferCreateType, pRegion, phMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1368,30 +792,15 @@ __urdlllocal ur_result_t UR_APICALL urMemGetNativeHandle( ur_device_handle_t hDevice, /// [out] a pointer to the native handle of the mem. ur_native_handle_t *phNativeMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hMem); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMem)->dditable; - auto pfnGetNativeHandle = dditable->ur.Mem.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Mem.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - - // convert loader handle to platform handle - hDevice = (hDevice) ? reinterpret_cast(hDevice)->handle - : nullptr; - // forward to device-platform - result = pfnGetNativeHandle(hMem, hDevice, phNativeMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hMem, hDevice, phNativeMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1405,36 +814,17 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( const ur_mem_native_properties_t *pProperties, /// [out][alloc] pointer to handle of buffer memory object created. ur_mem_handle_t *phMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBufferCreateWithNativeHandle = - dditable->ur.Mem.pfnBufferCreateWithNativeHandle; + auto *pfnBufferCreateWithNativeHandle = + dditable->Mem.pfnBufferCreateWithNativeHandle; if (nullptr == pfnBufferCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = - pfnBufferCreateWithNativeHandle(hNativeMem, hContext, pProperties, phMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnBufferCreateWithNativeHandle(hNativeMem, hContext, pProperties, + phMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1452,36 +842,17 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( const ur_mem_native_properties_t *pProperties, /// [out][alloc pointer to handle of image memory object created. ur_mem_handle_t *phMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageCreateWithNativeHandle = - dditable->ur.Mem.pfnImageCreateWithNativeHandle; + auto *pfnImageCreateWithNativeHandle = + dditable->Mem.pfnImageCreateWithNativeHandle; if (nullptr == pfnImageCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnImageCreateWithNativeHandle(hNativeMem, hContext, pImageFormat, - pImageDesc, pProperties, phMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnImageCreateWithNativeHandle(hNativeMem, hContext, pImageFormat, + pImageDesc, pProperties, phMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1502,54 +873,15 @@ __urdlllocal ur_result_t UR_APICALL urMemGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hMemory); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMemory)->dditable; - auto pfnGetInfo = dditable->ur.Mem.pfnGetInfo; + auto *pfnGetInfo = dditable->Mem.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMemory = reinterpret_cast(hMemory)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hMemory, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_MEM_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hMemory, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -1570,24 +902,15 @@ __urdlllocal ur_result_t UR_APICALL urMemImageGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hMemory); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMemory)->dditable; - auto pfnImageGetInfo = dditable->ur.Mem.pfnImageGetInfo; + auto *pfnImageGetInfo = dditable->Mem.pfnImageGetInfo; if (nullptr == pfnImageGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMemory = reinterpret_cast(hMemory)->handle; - // forward to device-platform - result = - pfnImageGetInfo(hMemory, propName, propSize, pPropValue, pPropSizeRet); - - return result; + return pfnImageGetInfo(hMemory, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -1599,35 +922,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreate( const ur_sampler_desc_t *pDesc, /// [out][alloc] pointer to handle of sampler object created ur_sampler_handle_t *phSampler) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreate = dditable->ur.Sampler.pfnCreate; + auto *pfnCreate = dditable->Sampler.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnCreate(hContext, pDesc, phSampler); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phSampler = reinterpret_cast( - context->factories.ur_sampler_factory.getInstance(*phSampler, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreate(hContext, pDesc, phSampler); } /////////////////////////////////////////////////////////////////////////////// @@ -1635,26 +938,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreate( __urdlllocal ur_result_t UR_APICALL urSamplerRetain( /// [in][retain] handle of the sampler object to get access ur_sampler_handle_t hSampler) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hSampler); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnRetain = dditable->ur.Sampler.pfnRetain; + auto *pfnRetain = dditable->Sampler.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform - result = pfnRetain(hSampler); - - // increment refcount of handle - context->factories.ur_sampler_factory.retain(hSampler); - - return result; + return pfnRetain(hSampler); } /////////////////////////////////////////////////////////////////////////////// @@ -1662,26 +954,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerRetain( __urdlllocal ur_result_t UR_APICALL urSamplerRelease( /// [in][release] handle of the sampler object to release ur_sampler_handle_t hSampler) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hSampler); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnRelease = dditable->ur.Sampler.pfnRelease; + auto *pfnRelease = dditable->Sampler.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform - result = pfnRelease(hSampler); - - // release loader handle - context->factories.ur_sampler_factory.release(hSampler); - - return result; + return pfnRelease(hSampler); } /////////////////////////////////////////////////////////////////////////////// @@ -1698,54 +979,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetInfo( void *pPropValue, /// [out][optional] size in bytes returned in sampler property value size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hSampler); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnGetInfo = dditable->ur.Sampler.pfnGetInfo; + auto *pfnGetInfo = dditable->Sampler.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hSampler, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_SAMPLER_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hSampler, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -1755,26 +997,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetNativeHandle( ur_sampler_handle_t hSampler, /// [out] a pointer to the native handle of the sampler. ur_native_handle_t *phNativeSampler) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hSampler); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnGetNativeHandle = dditable->ur.Sampler.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Sampler.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hSampler, phNativeSampler); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hSampler, phNativeSampler); } /////////////////////////////////////////////////////////////////////////////// @@ -1788,37 +1019,16 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( const ur_sampler_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the sampler object created. ur_sampler_handle_t *phSampler) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Sampler.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Sampler.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // forward to device-platform - result = pfnCreateWithNativeHandle(hNativeSampler, hContext, pProperties, - phSampler); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phSampler = reinterpret_cast( - context->factories.ur_sampler_factory.getInstance(*phSampler, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnCreateWithNativeHandle(hNativeSampler, hContext, pProperties, + phSampler); } /////////////////////////////////////////////////////////////////////////////// @@ -1834,27 +1044,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( size_t size, /// [out] pointer to USM host memory object void **ppMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnHostAlloc = dditable->ur.USM.pfnHostAlloc; + auto *pfnHostAlloc = dditable->USM.pfnHostAlloc; if (nullptr == pfnHostAlloc) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform - result = pfnHostAlloc(hContext, pUSMDesc, pool, size, ppMem); - - return result; + return pfnHostAlloc(hContext, pUSMDesc, pool, size, ppMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1872,30 +1070,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( size_t size, /// [out] pointer to USM device memory object void **ppMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnDeviceAlloc = dditable->ur.USM.pfnDeviceAlloc; + auto *pfnDeviceAlloc = dditable->USM.pfnDeviceAlloc; if (nullptr == pfnDeviceAlloc) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform - result = pfnDeviceAlloc(hContext, hDevice, pUSMDesc, pool, size, ppMem); - - return result; + return pfnDeviceAlloc(hContext, hDevice, pUSMDesc, pool, size, ppMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1913,30 +1096,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( size_t size, /// [out] pointer to USM shared memory object void **ppMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSharedAlloc = dditable->ur.USM.pfnSharedAlloc; + auto *pfnSharedAlloc = dditable->USM.pfnSharedAlloc; if (nullptr == pfnSharedAlloc) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform - result = pfnSharedAlloc(hContext, hDevice, pUSMDesc, pool, size, ppMem); - - return result; + return pfnSharedAlloc(hContext, hDevice, pUSMDesc, pool, size, ppMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1946,23 +1114,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMFree( ur_context_handle_t hContext, /// [in] pointer to USM memory object void *pMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnFree = dditable->ur.USM.pfnFree; + auto *pfnFree = dditable->USM.pfnFree; if (nullptr == pfnFree) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnFree(hContext, pMem); - - return result; + return pfnFree(hContext, pMem); } /////////////////////////////////////////////////////////////////////////////// @@ -1981,67 +1141,16 @@ __urdlllocal ur_result_t UR_APICALL urUSMGetMemAllocInfo( void *pPropValue, /// [out][optional] bytes returned in USM allocation property size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetMemAllocInfo = dditable->ur.USM.pfnGetMemAllocInfo; + auto *pfnGetMemAllocInfo = dditable->USM.pfnGetMemAllocInfo; if (nullptr == pfnGetMemAllocInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetMemAllocInfo(hContext, pMem, propName, propSize, pPropValue, - pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_USM_ALLOC_INFO_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_USM_ALLOC_INFO_POOL: { - ur_usm_pool_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_usm_pool_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetMemAllocInfo(hContext, pMem, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -2054,34 +1163,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolCreate( ur_usm_pool_desc_t *pPoolDesc, /// [out][alloc] pointer to USM memory pool ur_usm_pool_handle_t *ppPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolCreate = dditable->ur.USM.pfnPoolCreate; + auto *pfnPoolCreate = dditable->USM.pfnPoolCreate; if (nullptr == pfnPoolCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnPoolCreate(hContext, pPoolDesc, ppPool); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *ppPool = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(*ppPool, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnPoolCreate(hContext, pPoolDesc, ppPool); } /////////////////////////////////////////////////////////////////////////////// @@ -2089,26 +1179,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolCreate( __urdlllocal ur_result_t UR_APICALL urUSMPoolRetain( /// [in][retain] pointer to USM memory pool ur_usm_pool_handle_t pPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(pPool); - // extract platform's function pointer table - auto dditable = reinterpret_cast(pPool)->dditable; - auto pfnPoolRetain = dditable->ur.USM.pfnPoolRetain; + auto *pfnPoolRetain = dditable->USM.pfnPoolRetain; if (nullptr == pfnPoolRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - pPool = reinterpret_cast(pPool)->handle; - // forward to device-platform - result = pfnPoolRetain(pPool); - - // increment refcount of handle - context->factories.ur_usm_pool_factory.retain(pPool); - - return result; + return pfnPoolRetain(pPool); } /////////////////////////////////////////////////////////////////////////////// @@ -2116,26 +1195,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolRetain( __urdlllocal ur_result_t UR_APICALL urUSMPoolRelease( /// [in][release] pointer to USM memory pool ur_usm_pool_handle_t pPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(pPool); - // extract platform's function pointer table - auto dditable = reinterpret_cast(pPool)->dditable; - auto pfnPoolRelease = dditable->ur.USM.pfnPoolRelease; + auto *pfnPoolRelease = dditable->USM.pfnPoolRelease; if (nullptr == pfnPoolRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - pPool = reinterpret_cast(pPool)->handle; - // forward to device-platform - result = pfnPoolRelease(pPool); - - // release loader handle - context->factories.ur_usm_pool_factory.release(pPool); - - return result; + return pfnPoolRelease(pPool); } /////////////////////////////////////////////////////////////////////////////// @@ -2152,54 +1220,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolGetInfo( void *pPropValue, /// [out][optional] size in bytes returned in pool property value size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPool); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPool)->dditable; - auto pfnPoolGetInfo = dditable->ur.USM.pfnPoolGetInfo; + auto *pfnPoolGetInfo = dditable->USM.pfnPoolGetInfo; if (nullptr == pfnPoolGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnPoolGetInfo(hPool, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_USM_POOL_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnPoolGetInfo(hPool, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -2223,28 +1252,16 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName." size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGranularityGetInfo = dditable->ur.VirtualMem.pfnGranularityGetInfo; + auto *pfnGranularityGetInfo = dditable->VirtualMem.pfnGranularityGetInfo; if (nullptr == pfnGranularityGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = (hDevice) ? reinterpret_cast(hDevice)->handle - : nullptr; - // forward to device-platform - result = pfnGranularityGetInfo(hContext, hDevice, propName, propSize, - pPropValue, pPropSizeRet); - - return result; + return pfnGranularityGetInfo(hContext, hDevice, propName, propSize, + pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -2261,23 +1278,15 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemReserve( /// [out] pointer to the returned address at the start of reserved virtual /// memory range. void **ppStart) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReserve = dditable->ur.VirtualMem.pfnReserve; + auto *pfnReserve = dditable->VirtualMem.pfnReserve; if (nullptr == pfnReserve) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnReserve(hContext, pStart, size, ppStart); - - return result; + return pfnReserve(hContext, pStart, size, ppStart); } /////////////////////////////////////////////////////////////////////////////// @@ -2289,23 +1298,15 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemFree( const void *pStart, /// [in] size in bytes of the virtual memory range to free. size_t size) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnFree = dditable->ur.VirtualMem.pfnFree; + auto *pfnFree = dditable->VirtualMem.pfnFree; if (nullptr == pfnFree) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnFree(hContext, pStart, size); - - return result; + return pfnFree(hContext, pStart, size); } /////////////////////////////////////////////////////////////////////////////// @@ -2323,27 +1324,15 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemMap( size_t offset, /// [in] access flags for the physical memory mapping. ur_virtual_mem_access_flags_t flags) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMap = dditable->ur.VirtualMem.pfnMap; + auto *pfnMap = dditable->VirtualMem.pfnMap; if (nullptr == pfnMap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // forward to device-platform - result = pfnMap(hContext, pStart, size, hPhysicalMem, offset, flags); - - return result; + return pfnMap(hContext, pStart, size, hPhysicalMem, offset, flags); } /////////////////////////////////////////////////////////////////////////////// @@ -2355,23 +1344,15 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemUnmap( const void *pStart, /// [in] size in bytes of the virtual memory range. size_t size) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnUnmap = dditable->ur.VirtualMem.pfnUnmap; + auto *pfnUnmap = dditable->VirtualMem.pfnUnmap; if (nullptr == pfnUnmap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnUnmap(hContext, pStart, size); - - return result; + return pfnUnmap(hContext, pStart, size); } /////////////////////////////////////////////////////////////////////////////// @@ -2385,23 +1366,15 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemSetAccess( size_t size, /// [in] access flags to set for the mapped virtual memory range. ur_virtual_mem_access_flags_t flags) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSetAccess = dditable->ur.VirtualMem.pfnSetAccess; + auto *pfnSetAccess = dditable->VirtualMem.pfnSetAccess; if (nullptr == pfnSetAccess) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnSetAccess(hContext, pStart, size, flags); - - return result; + return pfnSetAccess(hContext, pStart, size, flags); } /////////////////////////////////////////////////////////////////////////////// @@ -2425,24 +1398,16 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName." size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetInfo = dditable->ur.VirtualMem.pfnGetInfo; + auto *pfnGetInfo = dditable->VirtualMem.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnGetInfo(hContext, pStart, size, propName, propSize, pPropValue, - pPropSizeRet); - - return result; + return pfnGetInfo(hContext, pStart, size, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -2459,38 +1424,15 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemCreate( const ur_physical_mem_properties_t *pProperties, /// [out][alloc] pointer to handle of physical memory object created. ur_physical_mem_handle_t *phPhysicalMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreate = dditable->ur.PhysicalMem.pfnCreate; + auto *pfnCreate = dditable->PhysicalMem.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnCreate(hContext, hDevice, size, pProperties, phPhysicalMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phPhysicalMem = reinterpret_cast( - context->factories.ur_physical_mem_factory.getInstance(*phPhysicalMem, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreate(hContext, hDevice, size, pProperties, phPhysicalMem); } /////////////////////////////////////////////////////////////////////////////// @@ -2498,28 +1440,15 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemCreate( __urdlllocal ur_result_t UR_APICALL urPhysicalMemRetain( /// [in][retain] handle of the physical memory object to retain. ur_physical_mem_handle_t hPhysicalMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPhysicalMem); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hPhysicalMem)->dditable; - auto pfnRetain = dditable->ur.PhysicalMem.pfnRetain; + auto *pfnRetain = dditable->PhysicalMem.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // forward to device-platform - result = pfnRetain(hPhysicalMem); - - // increment refcount of handle - context->factories.ur_physical_mem_factory.retain(hPhysicalMem); - - return result; + return pfnRetain(hPhysicalMem); } /////////////////////////////////////////////////////////////////////////////// @@ -2527,28 +1456,15 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemRetain( __urdlllocal ur_result_t UR_APICALL urPhysicalMemRelease( /// [in][release] handle of the physical memory object to release. ur_physical_mem_handle_t hPhysicalMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPhysicalMem); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hPhysicalMem)->dditable; - auto pfnRelease = dditable->ur.PhysicalMem.pfnRelease; + auto *pfnRelease = dditable->PhysicalMem.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // forward to device-platform - result = pfnRelease(hPhysicalMem); - - // release loader handle - context->factories.ur_physical_mem_factory.release(hPhysicalMem); - - return result; + return pfnRelease(hPhysicalMem); } /////////////////////////////////////////////////////////////////////////////// @@ -2568,69 +1484,15 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName." size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPhysicalMem); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hPhysicalMem)->dditable; - auto pfnGetInfo = dditable->ur.PhysicalMem.pfnGetInfo; + auto *pfnGetInfo = dditable->PhysicalMem.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = - pfnGetInfo(hPhysicalMem, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_PHYSICAL_MEM_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_PHYSICAL_MEM_INFO_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hPhysicalMem, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -2646,35 +1508,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( const ur_program_properties_t *pProperties, /// [out][alloc] pointer to handle of program object created. ur_program_handle_t *phProgram) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithIL = dditable->ur.Program.pfnCreateWithIL; + auto *pfnCreateWithIL = dditable->Program.pfnCreateWithIL; if (nullptr == pfnCreateWithIL) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnCreateWithIL(hContext, pIL, length, pProperties, phProgram); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithIL(hContext, pIL, length, pProperties, phProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -2697,42 +1539,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( const ur_program_properties_t *pProperties, /// [out][alloc] pointer to handle of Program object created. ur_program_handle_t *phProgram) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithBinary = dditable->ur.Program.pfnCreateWithBinary; + auto *pfnCreateWithBinary = dditable->Program.pfnCreateWithBinary; if (nullptr == pfnCreateWithBinary) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = pfnCreateWithBinary(hContext, numDevices, phDevicesLocal.data(), - pLengths, ppBinaries, pProperties, phProgram); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -2744,26 +1560,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( ur_program_handle_t hProgram, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBuild = dditable->ur.Program.pfnBuild; + auto *pfnBuild = dditable->Program.pfnBuild; if (nullptr == pfnBuild) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnBuild(hContext, hProgram, pOptions); - - return result; + return pfnBuild(hContext, hProgram, pOptions); } /////////////////////////////////////////////////////////////////////////////// @@ -2775,26 +1580,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompile( ur_program_handle_t hProgram, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCompile = dditable->ur.Program.pfnCompile; + auto *pfnCompile = dditable->Program.pfnCompile; if (nullptr == pfnCompile) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnCompile(hContext, hProgram, pOptions); - - return result; + return pfnCompile(hContext, hProgram, pOptions); } /////////////////////////////////////////////////////////////////////////////// @@ -2810,43 +1604,17 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink( const char *pOptions, /// [out][alloc] pointer to handle of program object created. ur_program_handle_t *phProgram) { - ur_result_t result = UR_RESULT_SUCCESS; if (nullptr != phProgram) { *phProgram = nullptr; } + auto *dditable = *reinterpret_cast(hContext); - [[maybe_unused]] auto context = getContext(); - - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnLink = dditable->ur.Program.pfnLink; + auto *pfnLink = dditable->Program.pfnLink; if (nullptr == pfnLink) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handles to platform handles - auto phProgramsLocal = std::vector(count); - for (size_t i = 0; i < count; ++i) - phProgramsLocal[i] = - reinterpret_cast(phPrograms[i])->handle; - // forward to device-platform - result = - pfnLink(hContext, count, phProgramsLocal.data(), pOptions, phProgram); - - try { - // convert platform handle to loader handle - if (nullptr != phProgram) - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnLink(hContext, count, phPrograms, pOptions, phProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -2854,26 +1622,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink( __urdlllocal ur_result_t UR_APICALL urProgramRetain( /// [in][retain] handle for the Program to retain ur_program_handle_t hProgram) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnRetain = dditable->ur.Program.pfnRetain; + auto *pfnRetain = dditable->Program.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnRetain(hProgram); - - // increment refcount of handle - context->factories.ur_program_factory.retain(hProgram); - - return result; + return pfnRetain(hProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -2881,26 +1638,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramRetain( __urdlllocal ur_result_t UR_APICALL urProgramRelease( /// [in][release] handle for the Program to release ur_program_handle_t hProgram) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnRelease = dditable->ur.Program.pfnRelease; + auto *pfnRelease = dditable->Program.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnRelease(hProgram); - - // release loader handle - context->factories.ur_program_factory.release(hProgram); - - return result; + return pfnRelease(hProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -2916,27 +1662,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetFunctionPointer( const char *pFunctionName, /// [out] Returns the pointer to the function if it is found in the program. void **ppFunctionPointer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetFunctionPointer = dditable->ur.Program.pfnGetFunctionPointer; + auto *pfnGetFunctionPointer = dditable->Program.pfnGetFunctionPointer; if (nullptr == pfnGetFunctionPointer) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnGetFunctionPointer(hDevice, hProgram, pFunctionName, - ppFunctionPointer); - - return result; + return pfnGetFunctionPointer(hDevice, hProgram, pFunctionName, + ppFunctionPointer); } /////////////////////////////////////////////////////////////////////////////// @@ -2954,29 +1689,18 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( /// [out] Returns the pointer to the global variable if it is found in the /// program. void **ppGlobalVariablePointerRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hDevice); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetGlobalVariablePointer = - dditable->ur.Program.pfnGetGlobalVariablePointer; + auto *pfnGetGlobalVariablePointer = + dditable->Program.pfnGetGlobalVariablePointer; if (nullptr == pfnGetGlobalVariablePointer) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnGetGlobalVariablePointer(hDevice, hProgram, pGlobalVariableName, - pGlobalVariableSizeRet, - ppGlobalVariablePointerRet); - - return result; + return pfnGetGlobalVariablePointer(hDevice, hProgram, pGlobalVariableName, + pGlobalVariableSizeRet, + ppGlobalVariablePointerRet); } /////////////////////////////////////////////////////////////////////////////// @@ -2998,66 +1722,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetInfo( /// [out][optional] pointer to the actual size in bytes of the queried /// propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnGetInfo = dditable->ur.Program.pfnGetInfo; + auto *pfnGetInfo = dditable->Program.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hProgram, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_PROGRAM_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_PROGRAM_INFO_DEVICES: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hProgram, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -3080,27 +1753,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetBuildInfo( /// [out][optional] pointer to the actual size in bytes of data being /// queried by propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnGetBuildInfo = dditable->ur.Program.pfnGetBuildInfo; + auto *pfnGetBuildInfo = dditable->Program.pfnGetBuildInfo; if (nullptr == pfnGetBuildInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnGetBuildInfo(hProgram, hDevice, propName, propSize, pPropValue, - pPropSizeRet); - - return result; + return pfnGetBuildInfo(hProgram, hDevice, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -3113,24 +1775,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramSetSpecializationConstants( /// [in][range(0, count)] array of specialization constant value /// descriptions const ur_specialization_constant_info_t *pSpecConstants) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnSetSpecializationConstants = - dditable->ur.Program.pfnSetSpecializationConstants; + auto *pfnSetSpecializationConstants = + dditable->Program.pfnSetSpecializationConstants; if (nullptr == pfnSetSpecializationConstants) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnSetSpecializationConstants(hProgram, count, pSpecConstants); - - return result; + return pfnSetSpecializationConstants(hProgram, count, pSpecConstants); } /////////////////////////////////////////////////////////////////////////////// @@ -3140,26 +1794,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetNativeHandle( ur_program_handle_t hProgram, /// [out] a pointer to the native handle of the program. ur_native_handle_t *phNativeProgram) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnGetNativeHandle = dditable->ur.Program.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Program.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hProgram, phNativeProgram); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hProgram, phNativeProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -3173,37 +1816,16 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( const ur_program_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the program object created. ur_program_handle_t *phProgram) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Program.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Program.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnCreateWithNativeHandle(hNativeProgram, hContext, pProperties, - phProgram); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithNativeHandle(hNativeProgram, hContext, pProperties, + phProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -3215,34 +1837,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreate( const char *pKernelName, /// [out][alloc] pointer to handle of kernel object created. ur_kernel_handle_t *phKernel) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnCreate = dditable->ur.Kernel.pfnCreate; + auto *pfnCreate = dditable->Kernel.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform - result = pfnCreate(hProgram, pKernelName, phKernel); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phKernel = reinterpret_cast( - context->factories.ur_kernel_factory.getInstance(*phKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreate(hProgram, pKernelName, phKernel); } /////////////////////////////////////////////////////////////////////////////// @@ -3260,23 +1863,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue( /// The data pointed to will be copied and therefore can be reused on /// return. const void *pArgValue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgValue = dditable->ur.Kernel.pfnSetArgValue; + auto *pfnSetArgValue = dditable->Kernel.pfnSetArgValue; if (nullptr == pfnSetArgValue) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnSetArgValue(hKernel, argIndex, argSize, pProperties, pArgValue); - - return result; + return pfnSetArgValue(hKernel, argIndex, argSize, pProperties, pArgValue); } /////////////////////////////////////////////////////////////////////////////// @@ -3290,23 +1885,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal( size_t argSize, /// [in][optional] pointer to local buffer properties. const ur_kernel_arg_local_properties_t *pProperties) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgLocal = dditable->ur.Kernel.pfnSetArgLocal; + auto *pfnSetArgLocal = dditable->Kernel.pfnSetArgLocal; if (nullptr == pfnSetArgLocal) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnSetArgLocal(hKernel, argIndex, argSize, pProperties); - - return result; + return pfnSetArgLocal(hKernel, argIndex, argSize, pProperties); } /////////////////////////////////////////////////////////////////////////////// @@ -3328,66 +1915,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetInfo( /// [out][optional] pointer to the actual size in bytes of data being /// queried by propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetInfo = dditable->ur.Kernel.pfnGetInfo; + auto *pfnGetInfo = dditable->Kernel.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hKernel, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_KERNEL_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_KERNEL_INFO_PROGRAM: { - ur_program_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_program_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_program_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hKernel, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -3407,27 +1943,16 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetGroupInfo( /// [out][optional] pointer to the actual size in bytes of data being /// queried by propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetGroupInfo = dditable->ur.Kernel.pfnGetGroupInfo; + auto *pfnGetGroupInfo = dditable->Kernel.pfnGetGroupInfo; if (nullptr == pfnGetGroupInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnGetGroupInfo(hKernel, hDevice, propName, propSize, pPropValue, - pPropSizeRet); - - return result; + return pfnGetGroupInfo(hKernel, hDevice, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -3447,27 +1972,16 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSubGroupInfo( /// [out][optional] pointer to the actual size in bytes of data being /// queried by propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetSubGroupInfo = dditable->ur.Kernel.pfnGetSubGroupInfo; + auto *pfnGetSubGroupInfo = dditable->Kernel.pfnGetSubGroupInfo; if (nullptr == pfnGetSubGroupInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnGetSubGroupInfo(hKernel, hDevice, propName, propSize, pPropValue, - pPropSizeRet); - - return result; + return pfnGetSubGroupInfo(hKernel, hDevice, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -3475,26 +1989,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSubGroupInfo( __urdlllocal ur_result_t UR_APICALL urKernelRetain( /// [in][retain] handle for the Kernel to retain ur_kernel_handle_t hKernel) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnRetain = dditable->ur.Kernel.pfnRetain; + auto *pfnRetain = dditable->Kernel.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnRetain(hKernel); - - // increment refcount of handle - context->factories.ur_kernel_factory.retain(hKernel); - - return result; + return pfnRetain(hKernel); } /////////////////////////////////////////////////////////////////////////////// @@ -3502,26 +2005,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelRetain( __urdlllocal ur_result_t UR_APICALL urKernelRelease( /// [in][release] handle for the Kernel to release ur_kernel_handle_t hKernel) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnRelease = dditable->ur.Kernel.pfnRelease; + auto *pfnRelease = dditable->Kernel.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnRelease(hKernel); - - // release loader handle - context->factories.ur_kernel_factory.release(hKernel); - - return result; + return pfnRelease(hKernel); } /////////////////////////////////////////////////////////////////////////////// @@ -3536,23 +2028,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer( /// [in][optional] Pointer obtained by USM allocation or virtual memory /// mapping operation. If null then argument value is considered null. const void *pArgValue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgPointer = dditable->ur.Kernel.pfnSetArgPointer; + auto *pfnSetArgPointer = dditable->Kernel.pfnSetArgPointer; if (nullptr == pfnSetArgPointer) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnSetArgPointer(hKernel, argIndex, pProperties, pArgValue); - - return result; + return pfnSetArgPointer(hKernel, argIndex, pProperties, pArgValue); } /////////////////////////////////////////////////////////////////////////////// @@ -3569,23 +2053,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetExecInfo( /// [in][typename(propName, propSize)] pointer to memory location holding /// the property value. const void *pPropValue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetExecInfo = dditable->ur.Kernel.pfnSetExecInfo; + auto *pfnSetExecInfo = dditable->Kernel.pfnSetExecInfo; if (nullptr == pfnSetExecInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnSetExecInfo(hKernel, propName, propSize, pProperties, pPropValue); - - return result; + return pfnSetExecInfo(hKernel, propName, propSize, pProperties, pPropValue); } /////////////////////////////////////////////////////////////////////////////// @@ -3599,26 +2075,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgSampler( const ur_kernel_arg_sampler_properties_t *pProperties, /// [in] handle of Sampler object. ur_sampler_handle_t hArgValue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgSampler = dditable->ur.Kernel.pfnSetArgSampler; + auto *pfnSetArgSampler = dditable->Kernel.pfnSetArgSampler; if (nullptr == pfnSetArgSampler) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hArgValue = reinterpret_cast(hArgValue)->handle; - // forward to device-platform - result = pfnSetArgSampler(hKernel, argIndex, pProperties, hArgValue); - - return result; + return pfnSetArgSampler(hKernel, argIndex, pProperties, hArgValue); } /////////////////////////////////////////////////////////////////////////////// @@ -3632,28 +2097,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( const ur_kernel_arg_mem_obj_properties_t *pProperties, /// [in][optional] handle of Memory object. ur_mem_handle_t hArgValue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgMemObj = dditable->ur.Kernel.pfnSetArgMemObj; + auto *pfnSetArgMemObj = dditable->Kernel.pfnSetArgMemObj; if (nullptr == pfnSetArgMemObj) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hArgValue = (hArgValue) - ? reinterpret_cast(hArgValue)->handle - : nullptr; - // forward to device-platform - result = pfnSetArgMemObj(hKernel, argIndex, pProperties, hArgValue); - - return result; + return pfnSetArgMemObj(hKernel, argIndex, pProperties, hArgValue); } /////////////////////////////////////////////////////////////////////////////// @@ -3665,24 +2117,16 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetSpecializationConstants( uint32_t count, /// [in] array of specialization constant value descriptions const ur_specialization_constant_info_t *pSpecConstants) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetSpecializationConstants = - dditable->ur.Kernel.pfnSetSpecializationConstants; + auto *pfnSetSpecializationConstants = + dditable->Kernel.pfnSetSpecializationConstants; if (nullptr == pfnSetSpecializationConstants) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnSetSpecializationConstants(hKernel, count, pSpecConstants); - - return result; + return pfnSetSpecializationConstants(hKernel, count, pSpecConstants); } /////////////////////////////////////////////////////////////////////////////// @@ -3692,26 +2136,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, /// [out] a pointer to the native handle of the kernel. ur_native_handle_t *phNativeKernel) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetNativeHandle = dditable->ur.Kernel.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Kernel.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hKernel, phNativeKernel); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hKernel, phNativeKernel); } /////////////////////////////////////////////////////////////////////////////// @@ -3727,41 +2160,16 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreateWithNativeHandle( const ur_kernel_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the kernel object created. ur_kernel_handle_t *phKernel) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Kernel.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Kernel.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = (hProgram) - ? reinterpret_cast(hProgram)->handle - : nullptr; - // forward to device-platform - result = pfnCreateWithNativeHandle(hNativeKernel, hContext, hProgram, - pProperties, phKernel); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phKernel = reinterpret_cast( - context->factories.ur_kernel_factory.getInstance(*phKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithNativeHandle(hNativeKernel, hContext, hProgram, + pProperties, phKernel); } /////////////////////////////////////////////////////////////////////////////// @@ -3784,29 +2192,18 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( /// [out] pointer to an array of numWorkDim unsigned values that specify /// suggested local work size that will contain the result of the query size_t *pSuggestedLocalWorkSize) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetSuggestedLocalWorkSize = - dditable->ur.Kernel.pfnGetSuggestedLocalWorkSize; + auto *pfnGetSuggestedLocalWorkSize = + dditable->Kernel.pfnGetSuggestedLocalWorkSize; if (nullptr == pfnGetSuggestedLocalWorkSize) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnGetSuggestedLocalWorkSize(hKernel, hQueue, numWorkDim, - pGlobalWorkOffset, pGlobalWorkSize, - pSuggestedLocalWorkSize); - - return result; + return pfnGetSuggestedLocalWorkSize(hKernel, hQueue, numWorkDim, + pGlobalWorkOffset, pGlobalWorkSize, + pSuggestedLocalWorkSize); } /////////////////////////////////////////////////////////////////////////////// @@ -3823,78 +2220,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetInfo( void *pPropValue, /// [out][optional] size in bytes returned in queue property value size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnGetInfo = dditable->ur.Queue.pfnGetInfo; + auto *pfnGetInfo = dditable->Queue.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hQueue, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_QUEUE_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_QUEUE_INFO_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_QUEUE_INFO_DEVICE_DEFAULT: { - ur_queue_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_queue_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hQueue, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -3908,37 +2242,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate( const ur_queue_properties_t *pProperties, /// [out][alloc] pointer to handle of queue object created ur_queue_handle_t *phQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreate = dditable->ur.Queue.pfnCreate; + auto *pfnCreate = dditable->Queue.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnCreate(hContext, hDevice, pProperties, phQueue); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phQueue = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(*phQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreate(hContext, hDevice, pProperties, phQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -3946,26 +2258,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate( __urdlllocal ur_result_t UR_APICALL urQueueRetain( /// [in][retain] handle of the queue object to get access ur_queue_handle_t hQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnRetain = dditable->ur.Queue.pfnRetain; + auto *pfnRetain = dditable->Queue.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnRetain(hQueue); - - // increment refcount of handle - context->factories.ur_queue_factory.retain(hQueue); - - return result; + return pfnRetain(hQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -3973,26 +2274,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueRetain( __urdlllocal ur_result_t UR_APICALL urQueueRelease( /// [in][release] handle of the queue object to release ur_queue_handle_t hQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnRelease = dditable->ur.Queue.pfnRelease; + auto *pfnRelease = dditable->Queue.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnRelease(hQueue); - - // release loader handle - context->factories.ur_queue_factory.release(hQueue); - - return result; + return pfnRelease(hQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -4004,26 +2294,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetNativeHandle( ur_queue_native_desc_t *pDesc, /// [out] a pointer to the native handle of the queue. ur_native_handle_t *phNativeQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnGetNativeHandle = dditable->ur.Queue.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Queue.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hQueue, pDesc, phNativeQueue); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hQueue, pDesc, phNativeQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -4039,39 +2318,16 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( const ur_queue_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the queue object created. ur_queue_handle_t *phQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = dditable->ur.Queue.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Queue.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = (hDevice) ? reinterpret_cast(hDevice)->handle - : nullptr; - // forward to device-platform - result = pfnCreateWithNativeHandle(hNativeQueue, hContext, hDevice, - pProperties, phQueue); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phQueue = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(*phQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateWithNativeHandle(hNativeQueue, hContext, hDevice, pProperties, + phQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -4079,23 +2335,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( __urdlllocal ur_result_t UR_APICALL urQueueFinish( /// [in] handle of the queue to be finished. ur_queue_handle_t hQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnFinish = dditable->ur.Queue.pfnFinish; + auto *pfnFinish = dditable->Queue.pfnFinish; if (nullptr == pfnFinish) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnFinish(hQueue); - - return result; + return pfnFinish(hQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -4103,23 +2351,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueFinish( __urdlllocal ur_result_t UR_APICALL urQueueFlush( /// [in] handle of the queue to be flushed. ur_queue_handle_t hQueue) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnFlush = dditable->ur.Queue.pfnFlush; + auto *pfnFlush = dditable->Queue.pfnFlush; if (nullptr == pfnFlush) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnFlush(hQueue); - - return result; + return pfnFlush(hQueue); } /////////////////////////////////////////////////////////////////////////////// @@ -4136,66 +2376,15 @@ __urdlllocal ur_result_t UR_APICALL urEventGetInfo( void *pPropValue, /// [out][optional] bytes returned in event property size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hEvent); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnGetInfo = dditable->ur.Event.pfnGetInfo; + auto *pfnGetInfo = dditable->Event.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - - // this value is needed for converting adapter handles to loader handles - size_t sizeret = 0; - if (pPropSizeRet == NULL) - pPropSizeRet = &sizeret; - - // forward to device-platform - result = pfnGetInfo(hEvent, propName, propSize, pPropValue, pPropSizeRet); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_EVENT_INFO_COMMAND_QUEUE: { - ur_queue_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_queue_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_EVENT_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnGetInfo(hEvent, propName, propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -4213,24 +2402,16 @@ __urdlllocal ur_result_t UR_APICALL urEventGetProfilingInfo( /// [out][optional] pointer to the actual size in bytes returned in /// propValue size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hEvent); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnGetProfilingInfo = dditable->ur.Event.pfnGetProfilingInfo; + auto *pfnGetProfilingInfo = dditable->Event.pfnGetProfilingInfo; if (nullptr == pfnGetProfilingInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform - result = - pfnGetProfilingInfo(hEvent, propName, propSize, pPropValue, pPropSizeRet); - - return result; + return pfnGetProfilingInfo(hEvent, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -4241,27 +2422,15 @@ __urdlllocal ur_result_t UR_APICALL urEventWait( /// [in][range(0, numEvents)] pointer to a list of events to wait for /// completion const ur_event_handle_t *phEventWaitList) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(phEventWaitList[0]); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(*phEventWaitList)->dditable; - auto pfnWait = dditable->ur.Event.pfnWait; + auto *pfnWait = dditable->Event.pfnWait; if (nullptr == pfnWait) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handles to platform handles - auto phEventWaitListLocal = std::vector(numEvents); - for (size_t i = 0; i < numEvents; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnWait(numEvents, phEventWaitListLocal.data()); - - return result; + return pfnWait(numEvents, phEventWaitList); } /////////////////////////////////////////////////////////////////////////////// @@ -4269,26 +2438,15 @@ __urdlllocal ur_result_t UR_APICALL urEventWait( __urdlllocal ur_result_t UR_APICALL urEventRetain( /// [in][retain] handle of the event object ur_event_handle_t hEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hEvent); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnRetain = dditable->ur.Event.pfnRetain; + auto *pfnRetain = dditable->Event.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform - result = pfnRetain(hEvent); - - // increment refcount of handle - context->factories.ur_event_factory.retain(hEvent); - - return result; + return pfnRetain(hEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4296,26 +2454,15 @@ __urdlllocal ur_result_t UR_APICALL urEventRetain( __urdlllocal ur_result_t UR_APICALL urEventRelease( /// [in][release] handle of the event object ur_event_handle_t hEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hEvent); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnRelease = dditable->ur.Event.pfnRelease; + auto *pfnRelease = dditable->Event.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform - result = pfnRelease(hEvent); - - // release loader handle - context->factories.ur_event_factory.release(hEvent); - - return result; + return pfnRelease(hEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4325,26 +2472,15 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( ur_event_handle_t hEvent, /// [out] a pointer to the native handle of the event. ur_native_handle_t *phNativeEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hEvent); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnGetNativeHandle = dditable->ur.Event.pfnGetNativeHandle; + auto *pfnGetNativeHandle = dditable->Event.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform - result = pfnGetNativeHandle(hEvent, phNativeEvent); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandle(hEvent, phNativeEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4358,51 +2494,17 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( const ur_event_native_properties_t *pProperties, /// [out][alloc] pointer to the handle of the event object created. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = dditable->ur.Event.pfnCreateWithNativeHandle; + auto *pfnCreateWithNativeHandle = dditable->Event.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = - pfnCreateWithNativeHandle(hNativeEvent, hContext, pProperties, phEvent); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; -} - -namespace { -struct event_callback_wrapper_data_t { - ur_event_callback_t fn; - ur_event_handle_t event; - void *userData; -}; - -void event_callback_wrapper([[maybe_unused]] ur_event_handle_t hEvent, - ur_execution_info_t execStatus, void *pUserData) { - auto *wrapper = reinterpret_cast(pUserData); - (wrapper->fn)(wrapper->event, execStatus, wrapper->userData); - delete wrapper; + return pfnCreateWithNativeHandle(hNativeEvent, hContext, pProperties, + phEvent); } -} // namespace /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEventSetCallback @@ -4415,30 +2517,15 @@ __urdlllocal ur_result_t UR_APICALL urEventSetCallback( ur_event_callback_t pfnNotify, /// [in][out][optional] pointer to data to be passed to callback. void *pUserData) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hEvent); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnSetCallback = dditable->ur.Event.pfnSetCallback; + auto *pfnSetCallback = dditable->Event.pfnSetCallback; if (nullptr == pfnSetCallback) return UR_RESULT_ERROR_UNINITIALIZED; - // Replace the callback with a wrapper function that gives the callback the - // loader event rather than a backend-specific event - auto *wrapper_data = - new event_callback_wrapper_data_t{pfnNotify, hEvent, pUserData}; - pUserData = wrapper_data; - pfnNotify = event_callback_wrapper; - - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform - result = pfnSetCallback(hEvent, execStatus, pfnNotify, pUserData); - - return result; + return pfnSetCallback(hEvent, execStatus, pfnNotify, pUserData); } /////////////////////////////////////////////////////////////////////////////// @@ -4475,48 +2562,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnKernelLaunch = dditable->ur.Enqueue.pfnKernelLaunch; + auto *pfnKernelLaunch = dditable->Enqueue.pfnKernelLaunch; if (nullptr == pfnKernelLaunch) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4536,44 +2592,15 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWait( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnEventsWait = dditable->ur.Enqueue.pfnEventsWait; + auto *pfnEventsWait = dditable->Enqueue.pfnEventsWait; if (nullptr == pfnEventsWait) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnEventsWait(hQueue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnEventsWait(hQueue, numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4593,44 +2620,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnEventsWaitWithBarrier = dditable->ur.Enqueue.pfnEventsWaitWithBarrier; + auto *pfnEventsWaitWithBarrier = dditable->Enqueue.pfnEventsWaitWithBarrier; if (nullptr == pfnEventsWaitWithBarrier) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnEventsWaitWithBarrier(hQueue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnEventsWaitWithBarrier(hQueue, numEventsInWaitList, phEventWaitList, + phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4659,48 +2658,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferRead = dditable->ur.Enqueue.pfnMemBufferRead; + auto *pfnMemBufferRead = dditable->Enqueue.pfnMemBufferRead; if (nullptr == pfnMemBufferRead) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferRead(hQueue, hBuffer, blockingRead, offset, size, pDst, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemBufferRead(hQueue, hBuffer, blockingRead, offset, size, pDst, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4729,48 +2696,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferWrite = dditable->ur.Enqueue.pfnMemBufferWrite; + auto *pfnMemBufferWrite = dditable->Enqueue.pfnMemBufferWrite; if (nullptr == pfnMemBufferWrite) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferWrite(hQueue, hBuffer, blockingWrite, offset, size, pSrc, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemBufferWrite(hQueue, hBuffer, blockingWrite, offset, size, pSrc, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4811,49 +2746,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferReadRect = dditable->ur.Enqueue.pfnMemBufferReadRect; + auto *pfnMemBufferReadRect = dditable->Enqueue.pfnMemBufferReadRect; if (nullptr == pfnMemBufferReadRect) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferReadRect( + return pfnMemBufferReadRect( hQueue, hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4895,49 +2799,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferWriteRect = dditable->ur.Enqueue.pfnMemBufferWriteRect; + auto *pfnMemBufferWriteRect = dditable->Enqueue.pfnMemBufferWriteRect; if (nullptr == pfnMemBufferWriteRect) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferWriteRect( + return pfnMemBufferWriteRect( hQueue, hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -4966,51 +2839,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferCopy = dditable->ur.Enqueue.pfnMemBufferCopy; + auto *pfnMemBufferCopy = dditable->Enqueue.pfnMemBufferCopy; if (nullptr == pfnMemBufferCopy) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBufferSrc = reinterpret_cast(hBufferSrc)->handle; - - // convert loader handle to platform handle - hBufferDst = reinterpret_cast(hBufferDst)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, - dstOffset, size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, dstOffset, + size, numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5047,52 +2885,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferCopyRect = dditable->ur.Enqueue.pfnMemBufferCopyRect; + auto *pfnMemBufferCopyRect = dditable->Enqueue.pfnMemBufferCopyRect; if (nullptr == pfnMemBufferCopyRect) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBufferSrc = reinterpret_cast(hBufferSrc)->handle; - - // convert loader handle to platform handle - hBufferDst = reinterpret_cast(hBufferDst)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferCopyRect(hQueue, hBufferSrc, hBufferDst, srcOrigin, - dstOrigin, region, srcRowPitch, srcSlicePitch, - dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemBufferCopyRect(hQueue, hBufferSrc, hBufferDst, srcOrigin, + dstOrigin, region, srcRowPitch, srcSlicePitch, + dstRowPitch, dstSlicePitch, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5121,48 +2925,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferFill = dditable->ur.Enqueue.pfnMemBufferFill; + auto *pfnMemBufferFill = dditable->Enqueue.pfnMemBufferFill; if (nullptr == pfnMemBufferFill) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, - size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, size, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5196,48 +2968,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemImageRead = dditable->ur.Enqueue.pfnMemImageRead; + auto *pfnMemImageRead = dditable->Enqueue.pfnMemImageRead; if (nullptr == pfnMemImageRead) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hImage = reinterpret_cast(hImage)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemImageRead(hQueue, hImage, blockingRead, origin, region, - rowPitch, slicePitch, pDst, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemImageRead(hQueue, hImage, blockingRead, origin, region, rowPitch, + slicePitch, pDst, numEventsInWaitList, phEventWaitList, + phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5271,48 +3012,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemImageWrite = dditable->ur.Enqueue.pfnMemImageWrite; + auto *pfnMemImageWrite = dditable->Enqueue.pfnMemImageWrite; if (nullptr == pfnMemImageWrite) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hImage = reinterpret_cast(hImage)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemImageWrite(hQueue, hImage, blockingWrite, origin, region, - rowPitch, slicePitch, pSrc, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemImageWrite(hQueue, hImage, blockingWrite, origin, region, + rowPitch, slicePitch, pSrc, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5344,51 +3054,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemImageCopy = dditable->ur.Enqueue.pfnMemImageCopy; + auto *pfnMemImageCopy = dditable->Enqueue.pfnMemImageCopy; if (nullptr == pfnMemImageCopy) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hImageSrc = reinterpret_cast(hImageSrc)->handle; - - // convert loader handle to platform handle - hImageDst = reinterpret_cast(hImageDst)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemImageCopy(hQueue, hImageSrc, hImageDst, srcOrigin, dstOrigin, - region, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemImageCopy(hQueue, hImageSrc, hImageDst, srcOrigin, dstOrigin, + region, numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5420,48 +3095,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( /// [out] return mapped pointer. TODO: move it before /// numEventsInWaitList? void **ppRetMap) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferMap = dditable->ur.Enqueue.pfnMemBufferMap; + auto *pfnMemBufferMap = dditable->Enqueue.pfnMemBufferMap; if (nullptr == pfnMemBufferMap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, offset, size, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent, ppRetMap); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, offset, size, + numEventsInWaitList, phEventWaitList, phEvent, + ppRetMap); } /////////////////////////////////////////////////////////////////////////////// @@ -5484,47 +3128,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemUnmap = dditable->ur.Enqueue.pfnMemUnmap; + auto *pfnMemUnmap = dditable->Enqueue.pfnMemUnmap; if (nullptr == pfnMemUnmap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemUnmap(hQueue, hMem, pMappedPtr, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnMemUnmap(hQueue, hMem, pMappedPtr, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5552,45 +3165,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMFill = dditable->ur.Enqueue.pfnUSMFill; + auto *pfnUSMFill = dditable->Enqueue.pfnUSMFill; if (nullptr == pfnUSMFill) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = - pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5617,44 +3201,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMMemcpy = dditable->ur.Enqueue.pfnUSMMemcpy; + auto *pfnUSMMemcpy = dditable->Enqueue.pfnUSMMemcpy; if (nullptr == pfnUSMMemcpy) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5679,44 +3235,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMPrefetch = dditable->ur.Enqueue.pfnUSMPrefetch; + auto *pfnUSMPrefetch = dditable->Enqueue.pfnUSMPrefetch; if (nullptr == pfnUSMPrefetch) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnUSMPrefetch(hQueue, pMem, size, flags, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnUSMPrefetch(hQueue, pMem, size, flags, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5733,36 +3261,15 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( /// [out][optional][alloc] return an event object that identifies this /// particular command instance. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMAdvise = dditable->ur.Enqueue.pfnUSMAdvise; + auto *pfnUSMAdvise = dditable->Enqueue.pfnUSMAdvise; if (nullptr == pfnUSMAdvise) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform - result = pfnUSMAdvise(hQueue, pMem, size, advice, phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnUSMAdvise(hQueue, pMem, size, advice, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5796,45 +3303,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMFill2D = dditable->ur.Enqueue.pfnUSMFill2D; + auto *pfnUSMFill2D = dditable->Enqueue.pfnUSMFill2D; if (nullptr == pfnUSMFill2D) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = - pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, height, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, height, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5869,45 +3347,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMMemcpy2D = dditable->ur.Enqueue.pfnUSMMemcpy2D; + auto *pfnUSMMemcpy2D = dditable->Enqueue.pfnUSMMemcpy2D; if (nullptr == pfnUSMMemcpy2D) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, - width, height, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, width, + height, numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -5939,49 +3388,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnDeviceGlobalVariableWrite = - dditable->ur.Enqueue.pfnDeviceGlobalVariableWrite; + auto *pfnDeviceGlobalVariableWrite = + dditable->Enqueue.pfnDeviceGlobalVariableWrite; if (nullptr == pfnDeviceGlobalVariableWrite) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnDeviceGlobalVariableWrite( - hQueue, hProgram, name, blockingWrite, count, offset, pSrc, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnDeviceGlobalVariableWrite(hQueue, hProgram, name, blockingWrite, + count, offset, pSrc, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6013,49 +3431,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnDeviceGlobalVariableRead = - dditable->ur.Enqueue.pfnDeviceGlobalVariableRead; + auto *pfnDeviceGlobalVariableRead = + dditable->Enqueue.pfnDeviceGlobalVariableRead; if (nullptr == pfnDeviceGlobalVariableRead) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnDeviceGlobalVariableRead(hQueue, hProgram, name, blockingRead, - count, offset, pDst, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnDeviceGlobalVariableRead(hQueue, hProgram, name, blockingRead, + count, offset, pDst, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6089,48 +3476,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueReadHostPipe( /// If phEventWaitList and phEvent are not NULL, phEvent must not refer to /// an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnReadHostPipe = dditable->ur.Enqueue.pfnReadHostPipe; + auto *pfnReadHostPipe = dditable->Enqueue.pfnReadHostPipe; if (nullptr == pfnReadHostPipe) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnReadHostPipe(hQueue, hProgram, pipe_symbol, blocking, pDst, size, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnReadHostPipe(hQueue, hProgram, pipe_symbol, blocking, pDst, size, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6165,48 +3520,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( /// If phEventWaitList and phEvent are not NULL, phEvent must not refer to /// an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnWriteHostPipe = dditable->ur.Enqueue.pfnWriteHostPipe; + auto *pfnWriteHostPipe = dditable->Enqueue.pfnWriteHostPipe; if (nullptr == pfnWriteHostPipe) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnWriteHostPipe(hQueue, hProgram, pipe_symbol, blocking, pSrc, size, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnWriteHostPipe(hQueue, hProgram, pipe_symbol, blocking, pSrc, size, + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6231,49 +3554,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( void **ppMem, /// [out][optional] return an event object that identifies the async alloc ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMDeviceAllocExp = dditable->ur.EnqueueExp.pfnUSMDeviceAllocExp; + auto *pfnUSMDeviceAllocExp = dditable->EnqueueExp.pfnUSMDeviceAllocExp; if (nullptr == pfnUSMDeviceAllocExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - pPool = (pPool) ? reinterpret_cast(pPool)->handle - : nullptr; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, - numEventsInWaitList, - phEventWaitListLocal.data(), ppMem, phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6298,49 +3589,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( void **ppMem, /// [out][optional] return an event object that identifies the async alloc ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMSharedAllocExp = dditable->ur.EnqueueExp.pfnUSMSharedAllocExp; + auto *pfnUSMSharedAllocExp = dditable->EnqueueExp.pfnUSMSharedAllocExp; if (nullptr == pfnUSMSharedAllocExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - pPool = (pPool) ? reinterpret_cast(pPool)->handle - : nullptr; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, - numEventsInWaitList, - phEventWaitListLocal.data(), ppMem, phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6365,49 +3624,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( void **ppMem, /// [out][optional] return an event object that identifies the async alloc ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMHostAllocExp = dditable->ur.EnqueueExp.pfnUSMHostAllocExp; + auto *pfnUSMHostAllocExp = dditable->EnqueueExp.pfnUSMHostAllocExp; if (nullptr == pfnUSMHostAllocExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - pPool = (pPool) ? reinterpret_cast(pPool)->handle - : nullptr; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = - pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, numEventsInWaitList, - phEventWaitListLocal.data(), ppMem, phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6428,48 +3655,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( const ur_event_handle_t *phEventWaitList, /// [out][optional] return an event object that identifies the async alloc ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMFreeExp = dditable->ur.EnqueueExp.pfnUSMFreeExp; + auto *pfnUSMFreeExp = dditable->EnqueueExp.pfnUSMFreeExp; if (nullptr == pfnUSMFreeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - pPool = (pPool) ? reinterpret_cast(pPool)->handle - : nullptr; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -6484,37 +3679,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolCreateExp( ur_usm_pool_desc_t *pPoolDesc, /// [out] pointer to USM memory pool ur_usm_pool_handle_t *pPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolCreateExp = dditable->ur.USMExp.pfnPoolCreateExp; + auto *pfnPoolCreateExp = dditable->USMExp.pfnPoolCreateExp; if (nullptr == pfnPoolCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnPoolCreateExp(hContext, hDevice, pPoolDesc, pPool); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *pPool = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(*pPool, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnPoolCreateExp(hContext, hDevice, pPoolDesc, pPool); } /////////////////////////////////////////////////////////////////////////////// @@ -6526,29 +3699,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolDestroyExp( ur_device_handle_t hDevice, /// [in] handle to USM memory pool to be destroyed ur_usm_pool_handle_t hPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolDestroyExp = dditable->ur.USMExp.pfnPoolDestroyExp; + auto *pfnPoolDestroyExp = dditable->USMExp.pfnPoolDestroyExp; if (nullptr == pfnPoolDestroyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - // forward to device-platform - result = pfnPoolDestroyExp(hContext, hDevice, hPool); - - return result; + return pfnPoolDestroyExp(hContext, hDevice, hPool); } /////////////////////////////////////////////////////////////////////////////// @@ -6560,38 +3719,16 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolGetDefaultDevicePoolExp( ur_device_handle_t hDevice, /// [out] pointer to USM memory pool ur_usm_pool_handle_t *pPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolGetDefaultDevicePoolExp = - dditable->ur.USMExp.pfnPoolGetDefaultDevicePoolExp; + auto *pfnPoolGetDefaultDevicePoolExp = + dditable->USMExp.pfnPoolGetDefaultDevicePoolExp; if (nullptr == pfnPoolGetDefaultDevicePoolExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnPoolGetDefaultDevicePoolExp(hContext, hDevice, pPool); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *pPool = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(*pPool, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnPoolGetDefaultDevicePoolExp(hContext, hDevice, pPool); } /////////////////////////////////////////////////////////////////////////////// @@ -6605,23 +3742,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolGetInfoExp( void *pPropValue, /// [out][optional] returned query value size size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPool); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPool)->dditable; - auto pfnPoolGetInfoExp = dditable->ur.USMExp.pfnPoolGetInfoExp; + auto *pfnPoolGetInfoExp = dditable->USMExp.pfnPoolGetInfoExp; if (nullptr == pfnPoolGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - // forward to device-platform - result = pfnPoolGetInfoExp(hPool, propName, pPropValue, pPropSizeRet); - - return result; + return pfnPoolGetInfoExp(hPool, propName, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -6635,23 +3764,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolSetInfoExp( void *pPropValue, /// [in] size of value to assign size_t propSize) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hPool); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPool)->dditable; - auto pfnPoolSetInfoExp = dditable->ur.USMExp.pfnPoolSetInfoExp; + auto *pfnPoolSetInfoExp = dditable->USMExp.pfnPoolSetInfoExp; if (nullptr == pfnPoolSetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - // forward to device-platform - result = pfnPoolSetInfoExp(hPool, propName, pPropValue, propSize); - - return result; + return pfnPoolSetInfoExp(hPool, propName, pPropValue, propSize); } /////////////////////////////////////////////////////////////////////////////// @@ -6663,29 +3784,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolSetDevicePoolExp( ur_device_handle_t hDevice, /// [in] handle to USM memory pool to set for a device ur_usm_pool_handle_t hPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolSetDevicePoolExp = dditable->ur.USMExp.pfnPoolSetDevicePoolExp; + auto *pfnPoolSetDevicePoolExp = dditable->USMExp.pfnPoolSetDevicePoolExp; if (nullptr == pfnPoolSetDevicePoolExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - // forward to device-platform - result = pfnPoolSetDevicePoolExp(hContext, hDevice, hPool); - - return result; + return pfnPoolSetDevicePoolExp(hContext, hDevice, hPool); } /////////////////////////////////////////////////////////////////////////////// @@ -6697,37 +3804,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolGetDevicePoolExp( ur_device_handle_t hDevice, /// [out] pointer to USM memory pool ur_usm_pool_handle_t *pPool) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolGetDevicePoolExp = dditable->ur.USMExp.pfnPoolGetDevicePoolExp; + auto *pfnPoolGetDevicePoolExp = dditable->USMExp.pfnPoolGetDevicePoolExp; if (nullptr == pfnPoolGetDevicePoolExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnPoolGetDevicePoolExp(hContext, hDevice, pPool); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *pPool = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(*pPool, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnPoolGetDevicePoolExp(hContext, hDevice, pPool); } /////////////////////////////////////////////////////////////////////////////// @@ -6741,29 +3826,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolTrimToExp( ur_usm_pool_handle_t hPool, /// [in] minimum number of bytes to keep in the pool size_t minBytesToKeep) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolTrimToExp = dditable->ur.USMExp.pfnPoolTrimToExp; + auto *pfnPoolTrimToExp = dditable->USMExp.pfnPoolTrimToExp; if (nullptr == pfnPoolTrimToExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - // forward to device-platform - result = pfnPoolTrimToExp(hContext, hDevice, hPool, minBytesToKeep); - - return result; + return pfnPoolTrimToExp(hContext, hDevice, hPool, minBytesToKeep); } /////////////////////////////////////////////////////////////////////////////// @@ -6787,31 +3858,16 @@ __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( void **ppMem, /// [out] pitch of the allocation size_t *pResultPitch) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPitchedAllocExp = dditable->ur.USMExp.pfnPitchedAllocExp; + auto *pfnPitchedAllocExp = dditable->USMExp.pfnPitchedAllocExp; if (nullptr == pfnPitchedAllocExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform - result = pfnPitchedAllocExp(hContext, hDevice, pUSMDesc, pool, widthInBytes, - height, elementSizeBytes, ppMem, pResultPitch); - - return result; + return pfnPitchedAllocExp(hContext, hDevice, pUSMDesc, pool, widthInBytes, + height, elementSizeBytes, ppMem, pResultPitch); } /////////////////////////////////////////////////////////////////////////////// @@ -6824,27 +3880,16 @@ urBindlessImagesUnsampledImageHandleDestroyExp( ur_device_handle_t hDevice, /// [in][release] pointer to handle of image object to destroy ur_exp_image_native_handle_t hImage) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnUnsampledImageHandleDestroyExp = - dditable->ur.BindlessImagesExp.pfnUnsampledImageHandleDestroyExp; + auto *pfnUnsampledImageHandleDestroyExp = + dditable->BindlessImagesExp.pfnUnsampledImageHandleDestroyExp; if (nullptr == pfnUnsampledImageHandleDestroyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnUnsampledImageHandleDestroyExp(hContext, hDevice, hImage); - - return result; + return pfnUnsampledImageHandleDestroyExp(hContext, hDevice, hImage); } /////////////////////////////////////////////////////////////////////////////// @@ -6857,27 +3902,16 @@ urBindlessImagesSampledImageHandleDestroyExp( ur_device_handle_t hDevice, /// [in][release] pointer to handle of image object to destroy ur_exp_image_native_handle_t hImage) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSampledImageHandleDestroyExp = - dditable->ur.BindlessImagesExp.pfnSampledImageHandleDestroyExp; + auto *pfnSampledImageHandleDestroyExp = + dditable->BindlessImagesExp.pfnSampledImageHandleDestroyExp; if (nullptr == pfnSampledImageHandleDestroyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnSampledImageHandleDestroyExp(hContext, hDevice, hImage); - - return result; + return pfnSampledImageHandleDestroyExp(hContext, hDevice, hImage); } /////////////////////////////////////////////////////////////////////////////// @@ -6893,30 +3927,16 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( const ur_image_desc_t *pImageDesc, /// [out][alloc] pointer to handle of image memory allocated ur_exp_image_mem_native_handle_t *phImageMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageAllocateExp = dditable->ur.BindlessImagesExp.pfnImageAllocateExp; + auto *pfnImageAllocateExp = dditable->BindlessImagesExp.pfnImageAllocateExp; if (nullptr == pfnImageAllocateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnImageAllocateExp(hContext, hDevice, pImageFormat, pImageDesc, - phImageMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnImageAllocateExp(hContext, hDevice, pImageFormat, pImageDesc, + phImageMem); } /////////////////////////////////////////////////////////////////////////////// @@ -6928,26 +3948,15 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageFreeExp( ur_device_handle_t hDevice, /// [in][release] handle of image memory to be freed ur_exp_image_mem_native_handle_t hImageMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageFreeExp = dditable->ur.BindlessImagesExp.pfnImageFreeExp; + auto *pfnImageFreeExp = dditable->BindlessImagesExp.pfnImageFreeExp; if (nullptr == pfnImageFreeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnImageFreeExp(hContext, hDevice, hImageMem); - - return result; + return pfnImageFreeExp(hContext, hDevice, hImageMem); } /////////////////////////////////////////////////////////////////////////////// @@ -6965,31 +3974,17 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( const ur_image_desc_t *pImageDesc, /// [out][alloc] pointer to handle of image object created ur_exp_image_native_handle_t *phImage) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnUnsampledImageCreateExp = - dditable->ur.BindlessImagesExp.pfnUnsampledImageCreateExp; + auto *pfnUnsampledImageCreateExp = + dditable->BindlessImagesExp.pfnUnsampledImageCreateExp; if (nullptr == pfnUnsampledImageCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnUnsampledImageCreateExp(hContext, hDevice, hImageMem, - pImageFormat, pImageDesc, phImage); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnUnsampledImageCreateExp(hContext, hDevice, hImageMem, pImageFormat, + pImageDesc, phImage); } /////////////////////////////////////////////////////////////////////////////// @@ -7009,34 +4004,17 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( ur_sampler_handle_t hSampler, /// [out][alloc] pointer to handle of image object created ur_exp_image_native_handle_t *phImage) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSampledImageCreateExp = - dditable->ur.BindlessImagesExp.pfnSampledImageCreateExp; + auto *pfnSampledImageCreateExp = + dditable->BindlessImagesExp.pfnSampledImageCreateExp; if (nullptr == pfnSampledImageCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform - result = pfnSampledImageCreateExp(hContext, hDevice, hImageMem, pImageFormat, - pImageDesc, hSampler, phImage); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnSampledImageCreateExp(hContext, hDevice, hImageMem, pImageFormat, + pImageDesc, hSampler, phImage); } /////////////////////////////////////////////////////////////////////////////// @@ -7073,45 +4051,18 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnImageCopyExp = dditable->ur.BindlessImagesExp.pfnImageCopyExp; + auto *pfnImageCopyExp = dditable->BindlessImagesExp.pfnImageCopyExp; if (nullptr == pfnImageCopyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, - pSrcImageFormat, pDstImageFormat, pCopyRegion, - imageCopyFlags, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, + pSrcImageFormat, pDstImageFormat, pCopyRegion, + imageCopyFlags, numEventsInWaitList, phEventWaitList, + phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -7127,24 +4078,16 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( void *pPropValue, /// [out][optional] returned query value size size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageGetInfoExp = dditable->ur.BindlessImagesExp.pfnImageGetInfoExp; + auto *pfnImageGetInfoExp = dditable->BindlessImagesExp.pfnImageGetInfoExp; if (nullptr == pfnImageGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnImageGetInfoExp(hContext, hImageMem, propName, pPropValue, - pPropSizeRet); - - return result; + return pfnImageGetInfoExp(hContext, hImageMem, propName, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -7160,31 +4103,16 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( uint32_t mipmapLevel, /// [out] returning memory handle to the individual image ur_exp_image_mem_native_handle_t *phImageMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMipmapGetLevelExp = - dditable->ur.BindlessImagesExp.pfnMipmapGetLevelExp; + auto *pfnMipmapGetLevelExp = dditable->BindlessImagesExp.pfnMipmapGetLevelExp; if (nullptr == pfnMipmapGetLevelExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnMipmapGetLevelExp(hContext, hDevice, hImageMem, mipmapLevel, - phImageMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnMipmapGetLevelExp(hContext, hDevice, hImageMem, mipmapLevel, + phImageMem); } /////////////////////////////////////////////////////////////////////////////// @@ -7196,26 +4124,15 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( ur_device_handle_t hDevice, /// [in][release] handle of image memory to be freed ur_exp_image_mem_native_handle_t hMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMipmapFreeExp = dditable->ur.BindlessImagesExp.pfnMipmapFreeExp; + auto *pfnMipmapFreeExp = dditable->BindlessImagesExp.pfnMipmapFreeExp; if (nullptr == pfnMipmapFreeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnMipmapFreeExp(hContext, hDevice, hMem); - - return result; + return pfnMipmapFreeExp(hContext, hDevice, hMem); } /////////////////////////////////////////////////////////////////////////////// @@ -7233,40 +4150,17 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( ur_exp_external_mem_desc_t *pExternalMemDesc, /// [out][alloc] external memory handle to the external memory ur_exp_external_mem_handle_t *phExternalMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImportExternalMemoryExp = - dditable->ur.BindlessImagesExp.pfnImportExternalMemoryExp; + auto *pfnImportExternalMemoryExp = + dditable->BindlessImagesExp.pfnImportExternalMemoryExp; if (nullptr == pfnImportExternalMemoryExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnImportExternalMemoryExp(hContext, hDevice, size, memHandleType, - pExternalMemDesc, phExternalMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phExternalMem = reinterpret_cast( - context->factories.ur_exp_external_mem_factory.getInstance( - *phExternalMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnImportExternalMemoryExp(hContext, hDevice, size, memHandleType, + pExternalMemDesc, phExternalMem); } /////////////////////////////////////////////////////////////////////////////// @@ -7284,35 +4178,17 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( ur_exp_external_mem_handle_t hExternalMem, /// [out] image memory handle to the externally allocated memory ur_exp_image_mem_native_handle_t *phImageMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMapExternalArrayExp = - dditable->ur.BindlessImagesExp.pfnMapExternalArrayExp; + auto *pfnMapExternalArrayExp = + dditable->BindlessImagesExp.pfnMapExternalArrayExp; if (nullptr == pfnMapExternalArrayExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalMem = - reinterpret_cast(hExternalMem)->handle; - // forward to device-platform - result = pfnMapExternalArrayExp(hContext, hDevice, pImageFormat, pImageDesc, - hExternalMem, phImageMem); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnMapExternalArrayExp(hContext, hDevice, pImageFormat, pImageDesc, + hExternalMem, phImageMem); } /////////////////////////////////////////////////////////////////////////////// @@ -7330,32 +4206,17 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( ur_exp_external_mem_handle_t hExternalMem, /// [out] pointer of the externally allocated memory void **ppRetMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMapExternalLinearMemoryExp = - dditable->ur.BindlessImagesExp.pfnMapExternalLinearMemoryExp; + auto *pfnMapExternalLinearMemoryExp = + dditable->BindlessImagesExp.pfnMapExternalLinearMemoryExp; if (nullptr == pfnMapExternalLinearMemoryExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalMem = - reinterpret_cast(hExternalMem)->handle; - // forward to device-platform - result = pfnMapExternalLinearMemoryExp(hContext, hDevice, offset, size, - hExternalMem, ppRetMem); - - return result; + return pfnMapExternalLinearMemoryExp(hContext, hDevice, offset, size, + hExternalMem, ppRetMem); } /////////////////////////////////////////////////////////////////////////////// @@ -7367,34 +4228,16 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( ur_device_handle_t hDevice, /// [in][release] handle of external memory to be destroyed ur_exp_external_mem_handle_t hExternalMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReleaseExternalMemoryExp = - dditable->ur.BindlessImagesExp.pfnReleaseExternalMemoryExp; + auto *pfnReleaseExternalMemoryExp = + dditable->BindlessImagesExp.pfnReleaseExternalMemoryExp; if (nullptr == pfnReleaseExternalMemoryExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalMem = - reinterpret_cast(hExternalMem)->handle; - // forward to device-platform - result = pfnReleaseExternalMemoryExp(hContext, hDevice, hExternalMem); - - // release loader handle - context->factories.ur_exp_external_mem_factory.release(hExternalMem); - - return result; + return pfnReleaseExternalMemoryExp(hContext, hDevice, hExternalMem); } /////////////////////////////////////////////////////////////////////////////// @@ -7410,41 +4253,18 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, /// [out][alloc] external semaphore handle to the external semaphore ur_exp_external_semaphore_handle_t *phExternalSemaphore) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImportExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnImportExternalSemaphoreExp; + auto *pfnImportExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnImportExternalSemaphoreExp; if (nullptr == pfnImportExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnImportExternalSemaphoreExp(hContext, hDevice, semHandleType, - pExternalSemaphoreDesc, - phExternalSemaphore); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phExternalSemaphore = reinterpret_cast( - context->factories.ur_exp_external_semaphore_factory.getInstance( - *phExternalSemaphore, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnImportExternalSemaphoreExp(hContext, hDevice, semHandleType, + pExternalSemaphoreDesc, + phExternalSemaphore); } /////////////////////////////////////////////////////////////////////////////// @@ -7456,37 +4276,16 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( ur_device_handle_t hDevice, /// [in][release] handle of external semaphore to be destroyed ur_exp_external_semaphore_handle_t hExternalSemaphore) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReleaseExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnReleaseExternalSemaphoreExp; + auto *pfnReleaseExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnReleaseExternalSemaphoreExp; if (nullptr == pfnReleaseExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalSemaphore = - reinterpret_cast(hExternalSemaphore) - ->handle; - // forward to device-platform - result = - pfnReleaseExternalSemaphoreExp(hContext, hDevice, hExternalSemaphore); - - // release loader handle - context->factories.ur_exp_external_semaphore_factory.release( - hExternalSemaphore); - - return result; + return pfnReleaseExternalSemaphoreExp(hContext, hDevice, hExternalSemaphore); } /////////////////////////////////////////////////////////////////////////////// @@ -7515,50 +4314,18 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnWaitExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnWaitExternalSemaphoreExp; + auto *pfnWaitExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnWaitExternalSemaphoreExp; if (nullptr == pfnWaitExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hSemaphore = - reinterpret_cast(hSemaphore) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnWaitExternalSemaphoreExp(hQueue, hSemaphore, hasWaitValue, - waitValue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnWaitExternalSemaphoreExp(hQueue, hSemaphore, hasWaitValue, + waitValue, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -7587,50 +4354,18 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnSignalExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnSignalExternalSemaphoreExp; + auto *pfnSignalExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnSignalExternalSemaphoreExp; if (nullptr == pfnSignalExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hSemaphore = - reinterpret_cast(hSemaphore) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnSignalExternalSemaphoreExp(hQueue, hSemaphore, hasSignalValue, - signalValue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnSignalExternalSemaphoreExp(hQueue, hSemaphore, hasSignalValue, + signalValue, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -7644,38 +4379,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( const ur_exp_command_buffer_desc_t *pCommandBufferDesc, /// [out][alloc] Pointer to command-Buffer handle. ur_exp_command_buffer_handle_t *phCommandBuffer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateExp = dditable->ur.CommandBufferExp.pfnCreateExp; + auto *pfnCreateExp = dditable->CommandBufferExp.pfnCreateExp; if (nullptr == pfnCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnCreateExp(hContext, hDevice, pCommandBufferDesc, phCommandBuffer); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phCommandBuffer = reinterpret_cast( - context->factories.ur_exp_command_buffer_factory.getInstance( - *phCommandBuffer, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnCreateExp(hContext, hDevice, pCommandBufferDesc, phCommandBuffer); } /////////////////////////////////////////////////////////////////////////////// @@ -7683,30 +4395,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( /// [in][retain] Handle of the command-buffer object. ur_exp_command_buffer_handle_t hCommandBuffer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnRetainExp = dditable->ur.CommandBufferExp.pfnRetainExp; + auto *pfnRetainExp = dditable->CommandBufferExp.pfnRetainExp; if (nullptr == pfnRetainExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform - result = pfnRetainExp(hCommandBuffer); - - // increment refcount of handle - context->factories.ur_exp_command_buffer_factory.retain(hCommandBuffer); - - return result; + return pfnRetainExp(hCommandBuffer); } /////////////////////////////////////////////////////////////////////////////// @@ -7714,30 +4411,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( /// [in][release] Handle of the command-buffer object. ur_exp_command_buffer_handle_t hCommandBuffer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnReleaseExp = dditable->ur.CommandBufferExp.pfnReleaseExp; + auto *pfnReleaseExp = dditable->CommandBufferExp.pfnReleaseExp; if (nullptr == pfnReleaseExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform - result = pfnReleaseExp(hCommandBuffer); - - // release loader handle - context->factories.ur_exp_command_buffer_factory.release(hCommandBuffer); - - return result; + return pfnReleaseExp(hCommandBuffer); } /////////////////////////////////////////////////////////////////////////////// @@ -7745,27 +4427,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// [in] Handle of the command-buffer object. ur_exp_command_buffer_handle_t hCommandBuffer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnFinalizeExp = dditable->ur.CommandBufferExp.pfnFinalizeExp; + auto *pfnFinalizeExp = dditable->CommandBufferExp.pfnFinalizeExp; if (nullptr == pfnFinalizeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform - result = pfnFinalizeExp(hCommandBuffer); - - return result; + return pfnFinalizeExp(hCommandBuffer); } /////////////////////////////////////////////////////////////////////////////// @@ -7814,71 +4484,20 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( /// [out][optional][alloc] Handle to this command. Only available if the /// command-buffer is updatable. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendKernelLaunchExp = - dditable->ur.CommandBufferExp.pfnAppendKernelLaunchExp; + auto *pfnAppendKernelLaunchExp = + dditable->CommandBufferExp.pfnAppendKernelLaunchExp; if (nullptr == pfnAppendKernelLaunchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phKernelAlternativesLocal = - std::vector(numKernelAlternatives); - for (size_t i = 0; i < numKernelAlternatives; ++i) - phKernelAlternativesLocal[i] = - reinterpret_cast(phKernelAlternatives[i])->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendKernelLaunchExp( + return pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numKernelAlternatives, phKernelAlternativesLocal.data(), + pLocalWorkSize, numKernelAlternatives, phKernelAlternatives, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + phEventWaitList, pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -7911,60 +4530,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMMemcpyExp = - dditable->ur.CommandBufferExp.pfnAppendUSMMemcpyExp; + auto *pfnAppendUSMMemcpyExp = + dditable->CommandBufferExp.pfnAppendUSMMemcpyExp; if (nullptr == pfnAppendUSMMemcpyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMMemcpyExp( - hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -7999,59 +4577,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMFillExp = dditable->ur.CommandBufferExp.pfnAppendUSMFillExp; + auto *pfnAppendUSMFillExp = dditable->CommandBufferExp.pfnAppendUSMFillExp; if (nullptr == pfnAppendUSMFillExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMFillExp( - hCommandBuffer, pMemory, pPattern, patternSize, size, - numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, + size, numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8088,66 +4625,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferCopyExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyExp; + auto *pfnAppendMemBufferCopyExp = + dditable->CommandBufferExp.pfnAppendMemBufferCopyExp; if (nullptr == pfnAppendMemBufferCopyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hSrcMem = reinterpret_cast(hSrcMem)->handle; - - // convert loader handle to platform handle - hDstMem = reinterpret_cast(hDstMem)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferCopyExp( + return pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + phEventWaitList, pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8182,63 +4672,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferWriteExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteExp; + auto *pfnAppendMemBufferWriteExp = + dditable->CommandBufferExp.pfnAppendMemBufferWriteExp; if (nullptr == pfnAppendMemBufferWriteExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferWriteExp( - hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnAppendMemBufferWriteExp(hCommandBuffer, hBuffer, offset, size, pSrc, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8273,63 +4719,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferReadExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferReadExp; + auto *pfnAppendMemBufferReadExp = + dditable->CommandBufferExp.pfnAppendMemBufferReadExp; if (nullptr == pfnAppendMemBufferReadExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferReadExp( - hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnAppendMemBufferReadExp(hCommandBuffer, hBuffer, offset, size, pDst, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8374,67 +4776,20 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferCopyRectExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyRectExp; + auto *pfnAppendMemBufferCopyRectExp = + dditable->CommandBufferExp.pfnAppendMemBufferCopyRectExp; if (nullptr == pfnAppendMemBufferCopyRectExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hSrcMem = reinterpret_cast(hSrcMem)->handle; - - // convert loader handle to platform handle - hDstMem = reinterpret_cast(hDstMem)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferCopyRectExp( + return pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + phEventWaitList, pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8482,64 +4837,20 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferWriteRectExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteRectExp; + auto *pfnAppendMemBufferWriteRectExp = + dditable->CommandBufferExp.pfnAppendMemBufferWriteRectExp; if (nullptr == pfnAppendMemBufferWriteRectExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferWriteRectExp( + return pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + phEventWaitList, pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8586,64 +4897,20 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( ur_event_handle_t *phEvent, /// [out][optional] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferReadRectExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferReadRectExp; + auto *pfnAppendMemBufferReadRectExp = + dditable->CommandBufferExp.pfnAppendMemBufferReadRectExp; if (nullptr == pfnAppendMemBufferReadRectExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferReadRectExp( + return pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + phEventWaitList, pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8680,63 +4947,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferFillExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferFillExp; + auto *pfnAppendMemBufferFillExp = + dditable->CommandBufferExp.pfnAppendMemBufferFillExp; if (nullptr == pfnAppendMemBufferFillExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendMemBufferFillExp( + return pfnAppendMemBufferFillExp( hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + phEventWaitList, pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8769,60 +4992,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMPrefetchExp = - dditable->ur.CommandBufferExp.pfnAppendUSMPrefetchExp; + auto *pfnAppendUSMPrefetchExp = + dditable->CommandBufferExp.pfnAppendUSMPrefetchExp; if (nullptr == pfnAppendUSMPrefetchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMPrefetchExp( - hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8855,60 +5037,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_event_handle_t *phEvent, /// [out][optional][alloc] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMAdviseExp = - dditable->ur.CommandBufferExp.pfnAppendUSMAdviseExp; + auto *pfnAppendUSMAdviseExp = + dditable->CommandBufferExp.pfnAppendUSMAdviseExp; if (nullptr == pfnAppendUSMAdviseExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMAdviseExp( - hCommandBuffer, pMemory, size, advice, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); } /////////////////////////////////////////////////////////////////////////////// @@ -8934,38 +5075,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, /// [out][optional] Sync point associated with this command. ur_exp_command_buffer_sync_point_t *pSyncPoint) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendNativeCommandExp = - dditable->ur.CommandBufferExp.pfnAppendNativeCommandExp; + auto *pfnAppendNativeCommandExp = + dditable->CommandBufferExp.pfnAppendNativeCommandExp; if (nullptr == pfnAppendNativeCommandExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hChildCommandBuffer = - (hChildCommandBuffer) - ? reinterpret_cast( - hChildCommandBuffer) - ->handle - : nullptr; - // forward to device-platform - result = pfnAppendNativeCommandExp( - hCommandBuffer, pfnNativeCommand, pData, hChildCommandBuffer, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); - - return result; + return pfnAppendNativeCommandExp(hCommandBuffer, pfnNativeCommand, pData, + hChildCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); } /////////////////////////////////////////////////////////////////////////////// @@ -8987,49 +5108,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCommandBufferExp( /// phEvent are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnCommandBufferExp = dditable->ur.EnqueueExp.pfnCommandBufferExp; + auto *pfnCommandBufferExp = dditable->EnqueueExp.pfnCommandBufferExp; if (nullptr == pfnCommandBufferExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = pfnCommandBufferExp(hQueue, hCommandBuffer, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnCommandBufferExp(hQueue, hCommandBuffer, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -9043,69 +5131,17 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( /// kernel commands are to be updated. const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnUpdateKernelLaunchExp = - dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + auto *pfnUpdateKernelLaunchExp = + dditable->CommandBufferExp.pfnUpdateKernelLaunchExp; if (nullptr == pfnUpdateKernelLaunchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // Deal with any struct parameters that have handle members we need to - // convert. - std::vector - pUpdateKernelLaunchVector = {}; - std::vector> - ppUpdateKernelLaunchpNewMemObjArgList(numKernelUpdates); - for (size_t Offset = 0; Offset < numKernelUpdates; Offset++) { - auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; - - pUpdateKernelLaunchLocal.hCommand = - reinterpret_cast( - pUpdateKernelLaunchLocal.hCommand) - ->handle; - if (pUpdateKernelLaunchLocal.hNewKernel) - pUpdateKernelLaunchLocal.hNewKernel = - reinterpret_cast( - pUpdateKernelLaunchLocal.hNewKernel) - ->handle; - - std::vector - &pUpdateKernelLaunchpNewMemObjArgList = - ppUpdateKernelLaunchpNewMemObjArgList[Offset]; - for (uint32_t i = 0; i < pUpdateKernelLaunch->numNewMemObjArgs; i++) { - ur_exp_command_buffer_update_memobj_arg_desc_t NewRangeStruct = - pUpdateKernelLaunchLocal.pNewMemObjArgList[i]; - if (NewRangeStruct.hNewMemObjArg) - NewRangeStruct.hNewMemObjArg = - reinterpret_cast(NewRangeStruct.hNewMemObjArg) - ->handle; - - pUpdateKernelLaunchpNewMemObjArgList.push_back(NewRangeStruct); - } - pUpdateKernelLaunchLocal.pNewMemObjArgList = - pUpdateKernelLaunchpNewMemObjArgList.data(); - - pUpdateKernelLaunchVector.push_back(pUpdateKernelLaunchLocal); - pUpdateKernelLaunch++; - } - pUpdateKernelLaunch = pUpdateKernelLaunchVector.data(); - // forward to device-platform - result = pfnUpdateKernelLaunchExp(hCommandBuffer, numKernelUpdates, - pUpdateKernelLaunch); - - return result; + return pfnUpdateKernelLaunchExp(hCommandBuffer, numKernelUpdates, + pUpdateKernelLaunch); } /////////////////////////////////////////////////////////////////////////////// @@ -9115,40 +5151,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( ur_exp_command_buffer_command_handle_t hCommand, /// [out][alloc] Event to be signaled. ur_event_handle_t *phSignalEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommand); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnUpdateSignalEventExp = - dditable->ur.CommandBufferExp.pfnUpdateSignalEventExp; + auto *pfnUpdateSignalEventExp = + dditable->CommandBufferExp.pfnUpdateSignalEventExp; if (nullptr == pfnUpdateSignalEventExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - // forward to device-platform - result = pfnUpdateSignalEventExp(hCommand, phSignalEvent); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *phSignalEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phSignalEvent, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnUpdateSignalEventExp(hCommand, phSignalEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -9162,36 +5174,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( /// events that must be complete before the command execution. If nullptr, /// the numEventsInWaitList must be 0, indicating no wait events. const ur_event_handle_t *phEventWaitList) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommand); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnUpdateWaitEventsExp = - dditable->ur.CommandBufferExp.pfnUpdateWaitEventsExp; + auto *pfnUpdateWaitEventsExp = + dditable->CommandBufferExp.pfnUpdateWaitEventsExp; if (nullptr == pfnUpdateWaitEventsExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, - phEventWaitListLocal.data()); - - return result; + return pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, phEventWaitList); } /////////////////////////////////////////////////////////////////////////////// @@ -9208,28 +5200,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( void *pPropValue, /// [out][optional] bytes returned in command-buffer property size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnGetInfoExp = dditable->ur.CommandBufferExp.pfnGetInfoExp; + auto *pfnGetInfoExp = dditable->CommandBufferExp.pfnGetInfoExp; if (nullptr == pfnGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform - result = pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, - pPropSizeRet); - - return result; + return pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, + pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -9239,31 +5219,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( ur_exp_command_buffer_handle_t hCommandBuffer, /// [out] A pointer to the native handle of the command-buffer. ur_native_handle_t *phNativeCommandBuffer) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hCommandBuffer); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnGetNativeHandleExp = - dditable->ur.CommandBufferExp.pfnGetNativeHandleExp; + auto *pfnGetNativeHandleExp = + dditable->CommandBufferExp.pfnGetNativeHandleExp; if (nullptr == pfnGetNativeHandleExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform - result = pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer); - - if (UR_RESULT_SUCCESS != result) - return result; - - return result; + return pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer); } /////////////////////////////////////////////////////////////////////////////// @@ -9300,50 +5265,18 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnCooperativeKernelLaunchExp = - dditable->ur.EnqueueExp.pfnCooperativeKernelLaunchExp; + auto *pfnCooperativeKernelLaunchExp = + dditable->EnqueueExp.pfnCooperativeKernelLaunchExp; if (nullptr == pfnCooperativeKernelLaunchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = pfnCooperativeKernelLaunchExp(hQueue, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -9365,29 +5298,18 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( size_t dynamicSharedMemorySize, /// [out] pointer to maximum number of groups uint32_t *pGroupCountRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hKernel); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSuggestMaxCooperativeGroupCountExp = - dditable->ur.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + auto *pfnSuggestMaxCooperativeGroupCountExp = + dditable->KernelExp.pfnSuggestMaxCooperativeGroupCountExp; if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform - result = pfnSuggestMaxCooperativeGroupCountExp( + return pfnSuggestMaxCooperativeGroupCountExp( hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize, pGroupCountRet); - - return result; } /////////////////////////////////////////////////////////////////////////////// @@ -9417,44 +5339,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( /// not NULL, phEvent must not refer to an element of the phEventWaitList /// array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnTimestampRecordingExp = - dditable->ur.EnqueueExp.pfnTimestampRecordingExp; + auto *pfnTimestampRecordingExp = + dditable->EnqueueExp.pfnTimestampRecordingExp; if (nullptr == pfnTimestampRecordingExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnTimestampRecordingExp(hQueue, blocking, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnTimestampRecordingExp(hQueue, blocking, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -9495,50 +5390,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( /// are not NULL, phEvent must not refer to an element of the /// phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnKernelLaunchCustomExp = - dditable->ur.EnqueueExp.pfnKernelLaunchCustomExp; + auto *pfnKernelLaunchCustomExp = + dditable->EnqueueExp.pfnKernelLaunchCustomExp; if (nullptr == pfnKernelLaunchCustomExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnKernelLaunchCustomExp( + return pfnKernelLaunchCustomExp( hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + numEventsInWaitList, phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -9552,29 +5416,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( ur_device_handle_t *phDevices, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnBuildExp = dditable->ur.ProgramExp.pfnBuildExp; + auto *pfnBuildExp = dditable->ProgramExp.pfnBuildExp; if (nullptr == pfnBuildExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = pfnBuildExp(hProgram, numDevices, phDevicesLocal.data(), pOptions); - - return result; + return pfnBuildExp(hProgram, numDevices, phDevices, pOptions); } /////////////////////////////////////////////////////////////////////////////// @@ -9588,29 +5438,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( ur_device_handle_t *phDevices, /// [in][optional] pointer to build options null-terminated string. const char *pOptions) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hProgram); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnCompileExp = dditable->ur.ProgramExp.pfnCompileExp; + auto *pfnCompileExp = dditable->ProgramExp.pfnCompileExp; if (nullptr == pfnCompileExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = pfnCompileExp(hProgram, numDevices, phDevicesLocal.data(), pOptions); - - return result; + return pfnCompileExp(hProgram, numDevices, phDevices, pOptions); } /////////////////////////////////////////////////////////////////////////////// @@ -9630,49 +5466,18 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( const char *pOptions, /// [out][alloc] pointer to handle of program object created. ur_program_handle_t *phProgram) { - ur_result_t result = UR_RESULT_SUCCESS; if (nullptr != phProgram) { *phProgram = nullptr; } + auto *dditable = *reinterpret_cast(hContext); - [[maybe_unused]] auto context = getContext(); - - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnLinkExp = dditable->ur.ProgramExp.pfnLinkExp; + auto *pfnLinkExp = dditable->ProgramExp.pfnLinkExp; if (nullptr == pfnLinkExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - - // convert loader handles to platform handles - auto phProgramsLocal = std::vector(count); - for (size_t i = 0; i < count; ++i) - phProgramsLocal[i] = - reinterpret_cast(phPrograms[i])->handle; - // forward to device-platform - result = pfnLinkExp(hContext, numDevices, phDevicesLocal.data(), count, - phProgramsLocal.data(), pOptions, phProgram); - - try { - // convert platform handle to loader handle - if (nullptr != phProgram) - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnLinkExp(hContext, numDevices, phDevices, count, phPrograms, + pOptions, phProgram); } /////////////////////////////////////////////////////////////////////////////// @@ -9684,23 +5489,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMImportExp( void *pMem, /// [in] size in bytes of the host memory object to be imported size_t size) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImportExp = dditable->ur.USMExp.pfnImportExp; + auto *pfnImportExp = dditable->USMExp.pfnImportExp; if (nullptr == pfnImportExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnImportExp(hContext, pMem, size); - - return result; + return pfnImportExp(hContext, pMem, size); } /////////////////////////////////////////////////////////////////////////////// @@ -9710,23 +5507,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMReleaseExp( ur_context_handle_t hContext, /// [in] pointer to host memory object void *pMem) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hContext); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReleaseExp = dditable->ur.USMExp.pfnReleaseExp; + auto *pfnReleaseExp = dditable->USMExp.pfnReleaseExp; if (nullptr == pfnReleaseExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform - result = pfnReleaseExp(hContext, pMem); - - return result; + return pfnReleaseExp(hContext, pMem); } /////////////////////////////////////////////////////////////////////////////// @@ -9736,27 +5525,15 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( ur_device_handle_t commandDevice, /// [in] handle of the peer device object ur_device_handle_t peerDevice) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(commandDevice); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(commandDevice)->dditable; - auto pfnEnablePeerAccessExp = dditable->ur.UsmP2PExp.pfnEnablePeerAccessExp; + auto *pfnEnablePeerAccessExp = dditable->UsmP2PExp.pfnEnablePeerAccessExp; if (nullptr == pfnEnablePeerAccessExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - commandDevice = reinterpret_cast(commandDevice)->handle; - - // convert loader handle to platform handle - peerDevice = reinterpret_cast(peerDevice)->handle; - // forward to device-platform - result = pfnEnablePeerAccessExp(commandDevice, peerDevice); - - return result; + return pfnEnablePeerAccessExp(commandDevice, peerDevice); } /////////////////////////////////////////////////////////////////////////////// @@ -9766,27 +5543,15 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( ur_device_handle_t commandDevice, /// [in] handle of the peer device object ur_device_handle_t peerDevice) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(commandDevice); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(commandDevice)->dditable; - auto pfnDisablePeerAccessExp = dditable->ur.UsmP2PExp.pfnDisablePeerAccessExp; + auto *pfnDisablePeerAccessExp = dditable->UsmP2PExp.pfnDisablePeerAccessExp; if (nullptr == pfnDisablePeerAccessExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - commandDevice = reinterpret_cast(commandDevice)->handle; - - // convert loader handle to platform handle - peerDevice = reinterpret_cast(peerDevice)->handle; - // forward to device-platform - result = pfnDisablePeerAccessExp(commandDevice, peerDevice); - - return result; + return pfnDisablePeerAccessExp(commandDevice, peerDevice); } /////////////////////////////////////////////////////////////////////////////// @@ -9810,28 +5575,16 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( /// [out][optional] pointer to the actual size in bytes of the queried /// propName. size_t *pPropSizeRet) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(commandDevice); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(commandDevice)->dditable; - auto pfnPeerAccessGetInfoExp = dditable->ur.UsmP2PExp.pfnPeerAccessGetInfoExp; + auto *pfnPeerAccessGetInfoExp = dditable->UsmP2PExp.pfnPeerAccessGetInfoExp; if (nullptr == pfnPeerAccessGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - commandDevice = reinterpret_cast(commandDevice)->handle; - - // convert loader handle to platform handle - peerDevice = reinterpret_cast(peerDevice)->handle; - // forward to device-platform - result = pfnPeerAccessGetInfoExp(commandDevice, peerDevice, propName, - propSize, pPropValue, pPropSizeRet); - - return result; + return pfnPeerAccessGetInfoExp(commandDevice, peerDevice, propName, propSize, + pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// @@ -9853,45 +5606,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( /// particular command instance. If phEventWaitList and phEvent are not /// NULL, phEvent must not refer to an element of the phEventWaitList array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnEventsWaitWithBarrierExt = - dditable->ur.Enqueue.pfnEventsWaitWithBarrierExt; + auto *pfnEventsWaitWithBarrierExt = + dditable->Enqueue.pfnEventsWaitWithBarrierExt; if (nullptr == pfnEventsWaitWithBarrierExt) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + return pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, + phEventWaitList, phEvent); } /////////////////////////////////////////////////////////////////////////////// @@ -9926,51 +5651,17 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( /// not NULL, phEvent must not refer to an element of the phEventWaitList /// array. ur_event_handle_t *phEvent) { - ur_result_t result = UR_RESULT_SUCCESS; - [[maybe_unused]] auto context = getContext(); + auto *dditable = *reinterpret_cast(hQueue); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnNativeCommandExp = dditable->ur.EnqueueExp.pfnNativeCommandExp; + auto *pfnNativeCommandExp = dditable->EnqueueExp.pfnNativeCommandExp; if (nullptr == pfnNativeCommandExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phMemListLocal = std::vector(numMemsInMemList); - for (size_t i = 0; i < numMemsInMemList; ++i) - phMemListLocal[i] = - reinterpret_cast(phMemList[i])->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - - // forward to device-platform - result = pfnNativeCommandExp( - hQueue, pfnNativeEnqueue, data, numMemsInMemList, phMemListLocal.data(), - pProperties, numEventsInWaitList, phEventWaitListLocal.data(), phEvent); - - // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any - // output handles below. - if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) - return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; + // forward to device-platform + return pfnNativeCommandExp(hQueue, pfnNativeEnqueue, data, numMemsInMemList, + phMemList, pProperties, numEventsInWaitList, + phEventWaitList, phEvent); } } // namespace ur_loader @@ -10014,7 +5705,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( "urGetGlobalProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Global); + platform.initStatus = getTable(version, &platform.dditable.Global); } if (UR_RESULT_SUCCESS == result) { @@ -10028,8 +5719,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( pDdiTable->pfnAdapterGetInfo = ur_loader::urAdapterGetInfo; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Global; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Global; } } @@ -10071,7 +5761,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( "urGetAdapterProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Adapter); + platform.initStatus = getTable(version, &platform.dditable.Adapter); } if (UR_RESULT_SUCCESS == result) { @@ -10083,8 +5773,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetAdapterProcAddrTable( ur_loader::urAdapterSetLoggerCallbackLevel; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Adapter; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Adapter; } } @@ -10127,7 +5816,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( if (!getTable) continue; platform.initStatus = - getTable(version, &platform.dditable.ur.BindlessImagesExp); + getTable(version, &platform.dditable.BindlessImagesExp); } if (UR_RESULT_SUCCESS == result) { @@ -10169,9 +5858,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( ur_loader::urBindlessImagesSignalExternalSemaphoreExp; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext() - ->platforms.front() - .dditable.ur.BindlessImagesExp; + *pDdiTable = + ur_loader::getContext()->platforms.front().dditable.BindlessImagesExp; } } @@ -10214,7 +5902,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( if (!getTable) continue; platform.initStatus = - getTable(version, &platform.dditable.ur.CommandBufferExp); + getTable(version, &platform.dditable.CommandBufferExp); } if (UR_RESULT_SUCCESS == result) { @@ -10262,9 +5950,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( ur_loader::urCommandBufferGetNativeHandleExp; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext() - ->platforms.front() - .dditable.ur.CommandBufferExp; + *pDdiTable = + ur_loader::getContext()->platforms.front().dditable.CommandBufferExp; } } @@ -10306,7 +5993,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( "urGetContextProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Context); + platform.initStatus = getTable(version, &platform.dditable.Context); } if (UR_RESULT_SUCCESS == result) { @@ -10323,8 +6010,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( pDdiTable->pfnSetExtendedDeleter = ur_loader::urContextSetExtendedDeleter; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Context; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Context; } } @@ -10366,7 +6052,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( "urGetEnqueueProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Enqueue); + platform.initStatus = getTable(version, &platform.dditable.Enqueue); } if (UR_RESULT_SUCCESS == result) { @@ -10405,8 +6091,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur_loader::urEnqueueEventsWaitWithBarrierExt; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Enqueue; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Enqueue; } } @@ -10448,7 +6133,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( "urGetEnqueueExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.EnqueueExp); + platform.initStatus = getTable(version, &platform.dditable.EnqueueExp); } if (UR_RESULT_SUCCESS == result) { @@ -10470,7 +6155,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.EnqueueExp; + ur_loader::getContext()->platforms.front().dditable.EnqueueExp; } } @@ -10512,7 +6197,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( "urGetEventProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Event); + platform.initStatus = getTable(version, &platform.dditable.Event); } if (UR_RESULT_SUCCESS == result) { @@ -10530,7 +6215,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( pDdiTable->pfnSetCallback = ur_loader::urEventSetCallback; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.Event; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Event; } } @@ -10572,7 +6257,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( "urGetKernelProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Kernel); + platform.initStatus = getTable(version, &platform.dditable.Kernel); } if (UR_RESULT_SUCCESS == result) { @@ -10600,8 +6285,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( ur_loader::urKernelSetSpecializationConstants; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Kernel; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Kernel; } } @@ -10643,7 +6327,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( "urGetKernelExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.KernelExp); + platform.initStatus = getTable(version, &platform.dditable.KernelExp); } if (UR_RESULT_SUCCESS == result) { @@ -10655,7 +6339,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.KernelExp; + ur_loader::getContext()->platforms.front().dditable.KernelExp; } } @@ -10697,7 +6381,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( "urGetMemProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Mem); + platform.initStatus = getTable(version, &platform.dditable.Mem); } if (UR_RESULT_SUCCESS == result) { @@ -10718,7 +6402,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( pDdiTable->pfnImageGetInfo = ur_loader::urMemImageGetInfo; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.Mem; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Mem; } } @@ -10760,7 +6444,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( "urGetPhysicalMemProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.PhysicalMem); + platform.initStatus = getTable(version, &platform.dditable.PhysicalMem); } if (UR_RESULT_SUCCESS == result) { @@ -10774,7 +6458,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.PhysicalMem; + ur_loader::getContext()->platforms.front().dditable.PhysicalMem; } } @@ -10816,7 +6500,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( "urGetPlatformProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Platform); + platform.initStatus = getTable(version, &platform.dditable.Platform); } if (UR_RESULT_SUCCESS == result) { @@ -10832,8 +6516,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( pDdiTable->pfnGetBackendOption = ur_loader::urPlatformGetBackendOption; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Platform; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Platform; } } @@ -10875,7 +6558,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( "urGetProgramProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Program); + platform.initStatus = getTable(version, &platform.dditable.Program); } if (UR_RESULT_SUCCESS == result) { @@ -10901,8 +6584,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( ur_loader::urProgramCreateWithNativeHandle; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Program; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Program; } } @@ -10944,7 +6626,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( "urGetProgramExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.ProgramExp); + platform.initStatus = getTable(version, &platform.dditable.ProgramExp); } if (UR_RESULT_SUCCESS == result) { @@ -10957,7 +6639,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.ProgramExp; + ur_loader::getContext()->platforms.front().dditable.ProgramExp; } } @@ -10999,7 +6681,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( "urGetQueueProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Queue); + platform.initStatus = getTable(version, &platform.dditable.Queue); } if (UR_RESULT_SUCCESS == result) { @@ -11017,7 +6699,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( pDdiTable->pfnFlush = ur_loader::urQueueFlush; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.Queue; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Queue; } } @@ -11059,7 +6741,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( "urGetSamplerProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Sampler); + platform.initStatus = getTable(version, &platform.dditable.Sampler); } if (UR_RESULT_SUCCESS == result) { @@ -11075,8 +6757,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( ur_loader::urSamplerCreateWithNativeHandle; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Sampler; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Sampler; } } @@ -11118,7 +6799,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( "urGetUSMProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.USM); + platform.initStatus = getTable(version, &platform.dditable.USM); } if (UR_RESULT_SUCCESS == result) { @@ -11136,7 +6817,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( pDdiTable->pfnPoolGetInfo = ur_loader::urUSMPoolGetInfo; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.USM; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.USM; } } @@ -11178,7 +6859,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( "urGetUSMExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.USMExp); + platform.initStatus = getTable(version, &platform.dditable.USMExp); } if (UR_RESULT_SUCCESS == result) { @@ -11199,8 +6880,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( pDdiTable->pfnReleaseExp = ur_loader::urUSMReleaseExp; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.USMExp; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.USMExp; } } @@ -11242,7 +6922,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( "urGetUsmP2PExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.UsmP2PExp); + platform.initStatus = getTable(version, &platform.dditable.UsmP2PExp); } if (UR_RESULT_SUCCESS == result) { @@ -11258,7 +6938,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.UsmP2PExp; + ur_loader::getContext()->platforms.front().dditable.UsmP2PExp; } } @@ -11300,7 +6980,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( "urGetVirtualMemProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.VirtualMem); + platform.initStatus = getTable(version, &platform.dditable.VirtualMem); } if (UR_RESULT_SUCCESS == result) { @@ -11318,7 +6998,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.VirtualMem; + ur_loader::getContext()->platforms.front().dditable.VirtualMem; } } @@ -11360,7 +7040,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( "urGetDeviceProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Device); + platform.initStatus = getTable(version, &platform.dditable.Device); } if (UR_RESULT_SUCCESS == result) { @@ -11380,8 +7060,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( ur_loader::urDeviceGetGlobalTimestamps; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Device; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Device; } } diff --git a/unified-runtime/source/loader/ur_ldrddi.hpp b/unified-runtime/source/loader/ur_ldrddi.hpp deleted file mode 100644 index 1ca7d18bc2a2f..0000000000000 --- a/unified-runtime/source/loader/ur_ldrddi.hpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. - * See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file ur_ldrddi.hpp - * - */ -#ifndef UR_LOADER_LDRDDI_H -#define UR_LOADER_LDRDDI_H 1 - -#include "ur_object.hpp" -#include "ur_singleton.hpp" - -namespace ur_loader { -/////////////////////////////////////////////////////////////////////////////// - -using ur_adapter_object_t = object_t; -using ur_adapter_factory_t = - singleton_factory_t; - -using ur_platform_object_t = object_t; -using ur_platform_factory_t = - singleton_factory_t; - -using ur_device_object_t = object_t; -using ur_device_factory_t = - singleton_factory_t; - -using ur_context_object_t = object_t; -using ur_context_factory_t = - singleton_factory_t; - -using ur_event_object_t = object_t; -using ur_event_factory_t = - singleton_factory_t; - -using ur_program_object_t = object_t; -using ur_program_factory_t = - singleton_factory_t; - -using ur_kernel_object_t = object_t; -using ur_kernel_factory_t = - singleton_factory_t; - -using ur_queue_object_t = object_t; -using ur_queue_factory_t = - singleton_factory_t; - -using ur_sampler_object_t = object_t; -using ur_sampler_factory_t = - singleton_factory_t; - -using ur_mem_object_t = object_t; -using ur_mem_factory_t = singleton_factory_t; - -using ur_physical_mem_object_t = object_t; -using ur_physical_mem_factory_t = - singleton_factory_t; - -using ur_usm_pool_object_t = object_t; -using ur_usm_pool_factory_t = - singleton_factory_t; - -using ur_exp_external_mem_object_t = object_t; -using ur_exp_external_mem_factory_t = - singleton_factory_t; - -using ur_exp_external_semaphore_object_t = - object_t; -using ur_exp_external_semaphore_factory_t = - singleton_factory_t; - -using ur_exp_command_buffer_object_t = object_t; -using ur_exp_command_buffer_factory_t = - singleton_factory_t; - -using ur_exp_command_buffer_command_object_t = - object_t; -using ur_exp_command_buffer_command_factory_t = - singleton_factory_t; - -struct handle_factories { - ur_adapter_factory_t ur_adapter_factory; - ur_platform_factory_t ur_platform_factory; - ur_device_factory_t ur_device_factory; - ur_context_factory_t ur_context_factory; - ur_event_factory_t ur_event_factory; - ur_program_factory_t ur_program_factory; - ur_kernel_factory_t ur_kernel_factory; - ur_queue_factory_t ur_queue_factory; - ur_sampler_factory_t ur_sampler_factory; - ur_mem_factory_t ur_mem_factory; - ur_physical_mem_factory_t ur_physical_mem_factory; - ur_usm_pool_factory_t ur_usm_pool_factory; - ur_exp_external_mem_factory_t ur_exp_external_mem_factory; - ur_exp_external_semaphore_factory_t ur_exp_external_semaphore_factory; - ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; - ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; -}; - -} // namespace ur_loader - -#endif /* UR_LOADER_LDRDDI_H */ diff --git a/unified-runtime/source/loader/ur_lib.cpp b/unified-runtime/source/loader/ur_lib.cpp index cba443337bb9f..9310b7b23f51b 100644 --- a/unified-runtime/source/loader/ur_lib.cpp +++ b/unified-runtime/source/loader/ur_lib.cpp @@ -17,7 +17,6 @@ #ifndef NOMINMAX #define NOMINMAX #include "ur_api.h" -#include "ur_ldrddi.hpp" #endif // !NOMINMAX #include "logger/ur_logger.hpp" diff --git a/unified-runtime/source/loader/ur_loader.cpp b/unified-runtime/source/loader/ur_loader.cpp index dca283d9851fd..7eb4e397203a9 100644 --- a/unified-runtime/source/loader/ur_loader.cpp +++ b/unified-runtime/source/loader/ur_loader.cpp @@ -37,7 +37,7 @@ ur_result_t context_t::init() { // a specific adapter library. Don't load any static adapters. if (!adapter_registry.adaptersForceLoaded()) { auto &level_zero = platforms.emplace_back(nullptr); - ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable.ur); + ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable); } #endif diff --git a/unified-runtime/source/loader/ur_loader.hpp b/unified-runtime/source/loader/ur_loader.hpp index 4ed36264ee6a4..0f700fd2e8b8f 100644 --- a/unified-runtime/source/loader/ur_loader.hpp +++ b/unified-runtime/source/loader/ur_loader.hpp @@ -14,7 +14,7 @@ #define UR_LOADER_HPP 1 #include "ur_adapter_registry.hpp" -#include "ur_ldrddi.hpp" +#include "ur_ddi.h" #include "ur_lib_loader.hpp" namespace ur_loader { @@ -25,7 +25,7 @@ struct platform_t { std::unique_ptr handle; ur_result_t initStatus = UR_RESULT_SUCCESS; - dditable_t dditable = {}; + ur_dditable_t dditable = {}; }; using platform_vector_t = std::vector; @@ -41,8 +41,6 @@ class context_t : public AtomicSingleton { ur_result_t init(); bool intercept_enabled = false; - - struct handle_factories factories; }; context_t *getContext(); diff --git a/unified-runtime/source/loader/ur_object.hpp b/unified-runtime/source/loader/ur_object.hpp deleted file mode 100644 index fa03adb91aa83..0000000000000 --- a/unified-runtime/source/loader/ur_object.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file ur_object.hpp - * - */ - -#ifndef UR_OBJECT_H -#define UR_OBJECT_H 1 - -#include "ur_ddi.h" -#include "ur_util.hpp" - -////////////////////////////////////////////////////////////////////////// -struct dditable_t { - ur_dditable_t ur; -}; - -////////////////////////////////////////////////////////////////////////// -template class object_t { -public: - using handle_t = _handle_t; - - handle_t handle; - dditable_t *dditable; - - object_t() = delete; - - object_t(handle_t _handle, dditable_t *_dditable) - : handle(_handle), dditable(_dditable) {} - - ~object_t() = default; -}; - -#endif /* UR_OBJECT_H */ diff --git a/unified-runtime/source/ur/ur.hpp b/unified-runtime/source/ur/ur.hpp index b784f10e5f33c..1d5b714119a95 100644 --- a/unified-runtime/source/ur/ur.hpp +++ b/unified-runtime/source/ur/ur.hpp @@ -371,6 +371,8 @@ template struct ZeCache : private T { T *operator->() { return &get(); } }; +struct ur_dditable_t; + // TODO: populate with target agnostic handling of UR platforms struct _ur_platform {}; @@ -380,6 +382,11 @@ extern bool PrintTrace; // The getInfo*/ReturnHelper facilities provide shortcut way of // writing return bytes for the various getInfo APIs. namespace ur { +template struct handle_base { + handle_base() { ddi_table = getddi::value(); }; + const ur_dditable_t *ddi_table = nullptr; +}; + template ur_result_t getInfoImpl(size_t param_value_size, void *param_value, size_t *param_value_size_ret, T value, diff --git a/unified-runtime/test/adapters/cuda/urQueueGetNativeHandle.cpp b/unified-runtime/test/adapters/cuda/urQueueGetNativeHandle.cpp index 7a7efc754ce0b..21c7882065027 100644 --- a/unified-runtime/test/adapters/cuda/urQueueGetNativeHandle.cpp +++ b/unified-runtime/test/adapters/cuda/urQueueGetNativeHandle.cpp @@ -6,10 +6,17 @@ #include "fixtures.h" #include "queue.hpp" +#include "ur_ddi.h" using urCudaQueueGetNativeHandleTest = uur::urQueueTest; UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCudaQueueGetNativeHandleTest); +const ur_dditable_t *ur::cuda::ddi_getter::value() { + // Return a blank dditable + static ur_dditable_t table{}; + return &table; +}; + TEST_P(urCudaQueueGetNativeHandleTest, Success) { CUstream Stream; ASSERT_SUCCESS( diff --git a/unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp b/unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp index 9dc627d687658..52cd6bd35e22c 100644 --- a/unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp +++ b/unified-runtime/test/adapters/level_zero/v2/event_pool_test.cpp @@ -29,6 +29,12 @@ using namespace v2; static constexpr size_t MAX_DEVICES = 10; +const ur_dditable_t *ur::level_zero::ddi_getter::value() { + // Return a blank dditable + static ur_dditable_t table{}; + return &table; +}; + // mock necessary functions from context, we can't pull in entire context // implementation due to a lot of other dependencies std::vector mockVec{}; diff --git a/unified-runtime/test/loader/handles/fixtures.hpp b/unified-runtime/test/loader/handles/fixtures.hpp index 4f07812e9993c..e3d8874da93f5 100644 --- a/unified-runtime/test/loader/handles/fixtures.hpp +++ b/unified-runtime/test/loader/handles/fixtures.hpp @@ -15,41 +15,9 @@ #define ASSERT_SUCCESS(ACTUAL) ASSERT_EQ(UR_RESULT_SUCCESS, ACTUAL) #endif -ur_result_t replace_urPlatformGet(void *pParams) { - const auto ¶ms = *static_cast(pParams); - - if (*params.ppNumPlatforms) { - **params.ppNumPlatforms = 1; - } - - if (*params.pphPlatforms && *params.pNumEntries == 1) { - **params.pphPlatforms = reinterpret_cast(0x1); - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t replace_urDeviceGetInfo(void *pParams) { - const auto ¶ms = *static_cast(pParams); - if (*params.ppropName == UR_DEVICE_INFO_PLATFORM) { - if (*params.ppPropSizeRet) { - **params.ppPropSizeRet = sizeof(ur_platform_handle_t); - } - if (*params.ppPropValue) { - **(reinterpret_cast(params.ppPropValue)) = - reinterpret_cast(0x1); - } - } - return UR_RESULT_SUCCESS; -} - struct LoaderHandleTest : ::testing::Test { void SetUp() override { urLoaderInit(0, nullptr); - mock::getCallbacks().set_replace_callback("urDeviceGetInfo", - &replace_urDeviceGetInfo); - mock::getCallbacks().set_replace_callback("urPlatformGet", - &replace_urPlatformGet); uint32_t nadapters = 0; adapter = nullptr; ASSERT_SUCCESS(urAdapterGet(1, &adapter, &nadapters)); diff --git a/unified-runtime/test/loader/handles/urLoaderHandles.cpp b/unified-runtime/test/loader/handles/urLoaderHandles.cpp index 4a66ad21b593c..82ebb5123fdc4 100644 --- a/unified-runtime/test/loader/handles/urLoaderHandles.cpp +++ b/unified-runtime/test/loader/handles/urLoaderHandles.cpp @@ -11,7 +11,8 @@ #include TEST_F(LoaderHandleTest, Success) { - ur_platform_handle_t query_platform; + ur_platform_handle_t query_platform = + reinterpret_cast(1234); size_t retsize; ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PLATFORM, sizeof(intptr_t), &query_platform, &retsize));