Skip to content

Expose ObjectCode as public API + prune unnecessary input arguments #435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 19, 2025
1 change: 1 addition & 0 deletions cuda_core/cuda/core/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from cuda.core.experimental._event import EventOptions
from cuda.core.experimental._launcher import LaunchConfig, launch
from cuda.core.experimental._linker import Linker, LinkerOptions
from cuda.core.experimental._module import ObjectCode
from cuda.core.experimental._program import Program, ProgramOptions
from cuda.core.experimental._stream import Stream, StreamOptions
from cuda.core.experimental._system import System
Expand Down
4 changes: 1 addition & 3 deletions cuda_core/cuda/core/experimental/_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,7 @@ def close(self):
__slots__ = ("__weakref__", "_mnff", "_timing_disabled", "_busy_waited")

def __init__(self):
raise NotImplementedError(
"directly creating an Event object can be ambiguous. Please call call Stream.record()."
)
raise NotImplementedError("directly creating an Event object can be ambiguous. Please call Stream.record().")

@staticmethod
def _init(options: Optional[EventOptions] = None):
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/cuda/core/experimental/_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def link(self, target_type) -> ObjectCode:
addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle))
code = (ctypes.c_char * size).from_address(addr)

return ObjectCode(bytes(code), target_type)
return ObjectCode._init(bytes(code), target_type)

def get_error_log(self) -> str:
"""Get the error log generated by the linker.
Expand Down
100 changes: 47 additions & 53 deletions cuda_core/cuda/core/experimental/_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE


from typing import Optional, Union
from warnings import warn

from cuda.core.experimental._utils import driver, get_binding_version, handle_return, precondition
Expand Down Expand Up @@ -213,47 +213,42 @@ def attributes(self):


class ObjectCode:
"""Represent a compiled program that was loaded onto the device.
"""Represent a compiled program to be loaded onto the device.

This object provides a unified interface for different types of
compiled programs that are loaded onto the device.
compiled programs that will be loaded onto the device.

Loads the module library with specified module code and JIT options.
Note
----
This class has no default constructor. If you already have a cubin that you would
like to load, use the :meth:`from_cubin` alternative constructor. For all other
possible code types (ex: "ptx"), only :class:`~cuda.core.experimental.Program`
accepts them and returns an :class:`ObjectCode` instance with its
:meth:`~cuda.core.experimental.Program.compile` method.

Note
----
Usage under CUDA 11.x will only load to the current device
context.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the module to load, or
a file path string containing that module for loading.
code_type : Any
String of the compiled type.
Supported options are "ptx", "cubin", "ltoir" and "fatbin".
jit_options : Optional
Mapping of JIT options to use during module loading.
(Default to no options)
symbol_mapping : Optional
Keyword argument dictionary specifying how symbol names
should be mapped before trying to retrieve them.
(Default to no mappings)

"""

__slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map")
__slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map")
_supported_code_type = ("cubin", "ptx", "ltoir", "fatbin")

def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None):
if code_type not in self._supported_code_type:
raise ValueError
def __init__(self):
raise NotImplementedError(
"directly creating an ObjectCode object can be ambiguous. Please either call Program.compile() "
"or one of the ObjectCode.from_*() constructors"
)

@staticmethod
def _init(module, code_type, *, symbol_mapping: Optional[dict] = None):
self = ObjectCode.__new__(ObjectCode)
assert code_type in self._supported_code_type, f"{code_type=} is not supported"
_lazy_init()

# handle is assigned during _lazy_load
self._handle = None
self._jit_options = jit_options

self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old"
self._loader = _backend[self._backend_version]
Expand All @@ -262,42 +257,41 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None):
self._module = module
self._sym_map = {} if symbol_mapping is None else symbol_mapping

return self

@staticmethod
def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing cubin.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory cubin to load, or
a file path string pointing to the on-disk cubin to load.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping)

# TODO: do we want to unload in a finalizer? Probably not..

def _lazy_load_module(self, *args, **kwargs):
if self._handle is not None:
return
jit_options = self._jit_options
module = self._module
if isinstance(module, str):
# TODO: this option is only taken by the new library APIs, but we have
# a bug that we can't easily support it just yet (NVIDIA/cuda-python#73).
if jit_options is not None:
raise ValueError
self._handle = handle_return(self._loader["file"](module))
if self._backend_version == "new":
self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0))
else: # "old" backend
self._handle = handle_return(self._loader["file"](module.encode()))
else:
assert isinstance(module, bytes)
if jit_options is None:
jit_options = {}
if self._backend_version == "new":
args = (
module,
list(jit_options.keys()),
list(jit_options.values()),
len(jit_options),
# TODO: support library options
[],
[],
0,
)
self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0))
else: # "old" backend
args = (
module,
len(jit_options),
list(jit_options.keys()),
list(jit_options.values()),
)
self._handle = handle_return(self._loader["data"](*args))
self._handle = handle_return(self._loader["data"](module, 0, [], []))

@precondition(_lazy_load_module)
def get_kernel(self, name):
Expand All @@ -314,12 +308,12 @@ def get_kernel(self, name):
Newly created kernel object.

"""
if self._code_type not in ("cubin", "ptx", "fatbin"):
raise RuntimeError(f"get_kernel() is not supported for {self._code_type}")
try:
name = self._sym_map[name]
except KeyError:
name = name.encode()

data = handle_return(self._loader["kernel"](self._handle, name))
return Kernel._from_obj(data, self)

# TODO: implement from_handle()
4 changes: 2 additions & 2 deletions cuda_core/cuda/core/experimental/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
if not isinstance(code, str):
raise TypeError("ptx Program expects code argument to be a string")
self._linker = Linker(
ObjectCode(code.encode(), code_type), options=self._translate_program_options(options)
ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
)
self._backend = "linker"
else:
Expand Down Expand Up @@ -472,7 +472,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle)
logs.write(log.decode())

return ObjectCode(data, target_type, symbol_mapping=symbol_mapping)
return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping)

if self._backend == "linker":
return self._linker.link(target_type)
Expand Down
1 change: 1 addition & 0 deletions cuda_core/docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ CUDA compilation toolchain

Program
Linker
ObjectCode

:template: dataclass.rst

Expand Down
19 changes: 12 additions & 7 deletions cuda_core/docs/source/release/0.2.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
``cuda.core`` 0.2.0 Release Notes
=================================

Released on <TODO>, 2024
Released on <TODO>, 2025

Highlights
----------
Expand All @@ -12,14 +12,19 @@ Highlights
- Add :class:`~DeviceProperties` to provide pythonic access to device properties.
- Add kernel attributes to :class:`~Kernel`

Limitations
-----------

- <TODO>

Breaking Changes
----------------

- Change ``__cuda_stream__`` from attribute to method
- The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`.
- :meth: `~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary.
- :meth:`~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary.

New features
------------

- Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`.

Limitations
-----------

- <TODO>
20 changes: 4 additions & 16 deletions cuda_core/examples/simple_multi_gpu_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,8 @@
}
"""
arch0 = "".join(f"{i}" for i in dev0.compute_capability)
prog_add = Program(code_add, code_type="c++")
mod_add = prog_add.compile(
"cubin",
options=(
"-std=c++17",
"-arch=sm_" + arch0,
),
)
prog_add = Program(code_add, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch0}"})
mod_add = prog_add.compile("cubin")
ker_add = mod_add.get_kernel("vector_add")

# Set GPU 1
Expand All @@ -63,14 +57,8 @@
}
"""
arch1 = "".join(f"{i}" for i in dev1.compute_capability)
prog_sub = Program(code_sub, code_type="c++")
mod_sub = prog_sub.compile(
"cubin",
options=(
"-std=c++17",
"-arch=sm_" + arch1,
),
)
prog_sub = Program(code_sub, code_type="c++", options={"std": "c++17", "arch": f"sm_{arch1}"})
mod_sub = prog_sub.compile("cubin")
ker_sub = mod_sub.get_kernel("vector_sub")


Expand Down
26 changes: 23 additions & 3 deletions cuda_core/tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import pytest
from conftest import can_load_generated_ptx

from cuda.core.experimental import Program, ProgramOptions, system
from cuda.core.experimental import ObjectCode, Program, ProgramOptions, system


@pytest.fixture(scope="function")
Expand All @@ -37,7 +37,7 @@ def get_saxpy_kernel(init_cuda):
)

# run in single precision
return mod.get_kernel("saxpy<float>")
return mod.get_kernel("saxpy<float>"), mod


@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new")
Expand Down Expand Up @@ -72,7 +72,7 @@ def test_get_kernel(init_cuda):
],
)
def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type):
kernel = get_saxpy_kernel
kernel, _ = get_saxpy_kernel
method = getattr(kernel.attributes, attr)
# get the value without providing a device ordinal
value = method()
Expand All @@ -82,3 +82,23 @@ def test_read_only_kernel_attributes(get_saxpy_kernel, attr, expected_type):
for device in system.devices:
value = method(device.device_id)
assert isinstance(value, expected_type), f"Expected {attr} to be of type {expected_type}, but got {type(value)}"


def test_object_code_load_cubin(get_saxpy_kernel):
_, mod = get_saxpy_kernel
cubin = mod._module
sym_map = mod._sym_map
assert isinstance(cubin, bytes)
mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map)
mod.get_kernel("saxpy<double>") # force loading


def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
_, mod = get_saxpy_kernel
cubin = mod._module
sym_map = mod._sym_map
assert isinstance(cubin, bytes)
cubin_file = tmp_path / "test.cubin"
cubin_file.write_bytes(cubin)
mod = ObjectCode.from_cubin(str(cubin_file), symbol_mapping=sym_map)
mod.get_kernel("saxpy<double>") # force loading
Loading