From 5408229b446e155ef7c8b83087aab5997dd453ca Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 22 Feb 2025 05:22:59 +0000 Subject: [PATCH 01/16] ensure program/linker have consistent backend and handle --- cuda_core/cuda/core/experimental/_linker.py | 7 ++++++- cuda_core/cuda/core/experimental/_program.py | 16 ++++++++-------- cuda_core/tests/test_linker.py | 13 +++++++------ cuda_core/tests/test_program.py | 15 ++++++++------- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 2f84f3502..ad47a2328 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -484,9 +484,14 @@ def _input_type_from_code_type(self, code_type: str): @property def handle(self): - """Return the linker handle object.""" + """Return the underlying handle object.""" return self._mnff.handle + @property + def backend(self) -> str: + """Return this Linker instance's underlying backend.""" + return "nvJitLink" if self._mnff.use_nvjitlink else "driver" + def close(self): """Destroy this linker.""" self._mnff.close() diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 1bb48afc9..6ea20e64b 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -382,7 +382,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], [])) - self._backend = "nvrtc" + self._backend = "NVRTC" self._linker = None elif code_type == "ptx": @@ -391,7 +391,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): self._linker = Linker( ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) - self._backend = "linker" + self._backend = self._linker.backend else: raise NotImplementedError @@ -445,9 +445,9 @@ def compile(self, target_type, name_expressions=(), logs=None): """ if target_type not in self._supported_target_type: - raise NotImplementedError + raise ValueError(f"the target type {target_type} is not supported") - if self._backend == "nvrtc": + if self._backend == "NVRTC": if target_type == "ptx" and not self._can_load_generated_ptx(): warn( "The CUDA driver version is older than the backend version. " @@ -489,15 +489,15 @@ def compile(self, target_type, name_expressions=(), logs=None): return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) - if self._backend == "linker": + if self._backend in ("nvJitLink", "driver"): return self._linker.link(target_type) @property - def backend(self): - """Return the backend type string associated with this program.""" + def backend(self) -> str: + """Return this Program instance's underlying backend.""" return self._backend @property def handle(self): - """Return the program handle object.""" + """Return the underlying handle object.""" return self._mnff.handle diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index 732f70c32..939890f4c 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -17,8 +17,8 @@ device_function_b = "__device__ int B() { return 0; }" device_function_c = "__device__ int C(int a, int b) { return a + b; }" -culink_backend = _linker._decide_nvjitlink_or_driver() -if not culink_backend: +is_culink_backend = _linker._decide_nvjitlink_or_driver() +if not is_culink_backend: from cuda.bindings import nvjitlink @@ -54,7 +54,7 @@ def compile_ltoir_functions(init_cuda): LinkerOptions(arch=ARCH, debug=True), LinkerOptions(arch=ARCH, lineinfo=True), ] -if not culink_backend: +if not is_culink_backend: options += [ LinkerOptions(arch=ARCH, time=True), LinkerOptions(arch=ARCH, optimize_unused_variables=True), @@ -85,16 +85,17 @@ def test_linker_init(compile_ptx_functions, options): linker = Linker(*compile_ptx_functions, options=options) object_code = linker.link("cubin") assert isinstance(object_code, ObjectCode) + assert linker.backend == ("driver" if is_culink_backend else "nvJitLink") def test_linker_init_invalid_arch(compile_ptx_functions): - err = AttributeError if culink_backend else nvjitlink.nvJitLinkError + err = AttributeError if is_culink_backend else nvjitlink.nvJitLinkError with pytest.raises(err): options = LinkerOptions(arch="99", ptx=True) Linker(*compile_ptx_functions, options=options) -@pytest.mark.skipif(culink_backend, reason="culink does not support ptx option") +@pytest.mark.skipif(is_culink_backend, reason="culink does not support ptx option") def test_linker_link_ptx_nvjitlink(compile_ltoir_functions): options = LinkerOptions(arch=ARCH, link_time_optimization=True, ptx=True) linker = Linker(*compile_ltoir_functions, options=options) @@ -102,7 +103,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions): assert isinstance(linked_code, ObjectCode) -@pytest.mark.skipif(not culink_backend, reason="nvjitlink requires lto for ptx linking") +@pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking") def test_linker_link_ptx_culink(compile_ptx_functions): options = LinkerOptions(arch=ARCH) linker = Linker(*compile_ptx_functions, options=options) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 8d2ecd1ab..88ed76c85 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -14,6 +14,8 @@ from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions +is_culink_backend = _linker._decide_nvjitlink_or_driver() + @pytest.fixture(scope="module") def ptx_code_object(): @@ -50,7 +52,7 @@ def ptx_code_object(): def test_cpp_program_with_various_options(init_cuda, options): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++", options) - assert program.backend == "nvrtc" + assert program.backend == "NVRTC" program.compile("ptx") program.close() assert program.handle is None @@ -65,8 +67,7 @@ def test_cpp_program_with_various_options(init_cuda, options): ProgramOptions(prec_sqrt=True), ProgramOptions(fma=True), ] -if not _linker._decide_nvjitlink_or_driver(): - print("Using nvjitlink as the backend because decide() returned false") +if not is_culink_backend: options += [ ProgramOptions(time=True), ProgramOptions(split_compile=True), @@ -76,7 +77,7 @@ def test_cpp_program_with_various_options(init_cuda, options): @pytest.mark.parametrize("options", options) def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options): program = Program(ptx_code_object._module.decode(), "ptx", options=options) - assert program.backend == "linker" + assert program.backend == ("driver" if is_culink_backend else "nvJitLink") program.compile("cubin") program.close() assert program.handle is None @@ -85,7 +86,7 @@ def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options): def test_program_init_valid_code_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - assert program.backend == "nvrtc" + assert program.backend == "NVRTC" assert program.handle is not None @@ -125,14 +126,14 @@ def test_program_compile_valid_target_type(init_cuda): def test_program_compile_invalid_target_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - with pytest.raises(NotImplementedError): + with pytest.raises(ValueError): program.compile("invalid_target") def test_program_backend_property(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - assert program.backend == "nvrtc" + assert program.backend == "NVRTC" def test_program_handle_property(): From 83849378dced4698335c68ac75a58dc1e485ba9f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 23 Feb 2025 21:45:48 +0000 Subject: [PATCH 02/16] support getting native handle in C++/Cython --- .github/workflows/test-wheel-linux.yml | 13 ++++++ cuda_core/README.md | 11 +++++ cuda_core/cuda/core/experimental/_event.py | 4 +- cuda_core/cuda/core/experimental/_linker.py | 6 ++- cuda_core/cuda/core/experimental/_program.py | 4 +- cuda_core/cuda/core/experimental/_stream.py | 4 +- .../core/experimental/include/utility.hpp | 17 ++++++++ cuda_core/tests/cython/build_tests.sh | 4 ++ cuda_core/tests/cython/test_cython.py | 42 +++++++++++++++++++ .../cython/test_get_cuda_native_handle.pyx | 32 ++++++++++++++ cuda_core/tests/pytest.ini | 2 + 11 files changed, 132 insertions(+), 7 deletions(-) create mode 100644 cuda_core/cuda/core/experimental/include/utility.hpp create mode 100755 cuda_core/tests/cython/build_tests.sh create mode 100644 cuda_core/tests/cython/test_cython.py create mode 100644 cuda_core/tests/cython/test_get_cuda_native_handle.pyx create mode 100644 cuda_core/tests/pytest.ini diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index c15de07e5..9d2bdde4b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -223,6 +223,19 @@ jobs: pytest -rxXs tests/ popd + # It is a bit convoluted to run the Cython tests against CTK wheels, + # so let's just skip them. + if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + bash tests/cython/build_tests.sh + elif [[ "${{ inputs.host-platform }}" == win* ]]; then + # TODO: enable this once win-64 runners are up + exit 1 + fi + pytest -rxXs tests/cython + popd + fi + - name: Ensure cuda-python installable run: | if [[ "${{ inputs.local-ctk }}" == 1 ]]; then diff --git a/cuda_core/README.md b/cuda_core/README.md index 534cdb8fe..3743f8fae 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -41,3 +41,14 @@ for more details, including how to sign your commits. To run these tests: * `python -m pytest tests/` against editable installations * `pytest tests/` against installed packages + +### Cython Unit Tests + +Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [those of cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them: + +1. Setup environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation. +2. Run `build_tests` script located in `test/cython` appropriate to your platform. This will both cythonize the tests and build them. + +To run these tests: +* `python -m pytest tests/cython/` against editable installations +* `pytest tests/cython/` against installed packages diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index 06005f95a..5d7032927 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -130,6 +130,6 @@ def is_done(self) -> bool: raise CUDAError(f"unexpected error: {result}") @property - def handle(self) -> int: + def handle(self) -> "CUevent": """Return the underlying cudaEvent_t pointer address as Python int.""" - return int(self._mnff.handle) + return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index ad47a2328..fe8c462f4 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -323,6 +323,10 @@ def _exception_manager(self): raise e +nvJitLinkHandleT = int +LinkerHandleT = Union[nvJitLinkHandleT, "CUlinkState"] + + class Linker: """Represent a linking machinery to link one or multiple object codes into :obj:`~cuda.core.experimental._module.ObjectCode` with the specified options. @@ -483,7 +487,7 @@ def _input_type_from_code_type(self, code_type: str): return input_type @property - def handle(self): + def handle(self) -> LinkerHandleT: """Return the underlying handle object.""" return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 6ea20e64b..5e3957345 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -8,7 +8,7 @@ from warnings import warn from cuda.core.experimental._device import Device -from cuda.core.experimental._linker import Linker, LinkerOptions +from cuda.core.experimental._linker import Linker, LinkerHandleT, LinkerOptions from cuda.core.experimental._module import ObjectCode from cuda.core.experimental._utils import ( _handle_boolean_option, @@ -498,6 +498,6 @@ def backend(self) -> str: return self._backend @property - def handle(self): + def handle(self) -> Union["nvrtcProgram", LinkerHandleT]: """Return the underlying handle object.""" return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py index 09d672365..a980ae2c5 100644 --- a/cuda_core/cuda/core/experimental/_stream.py +++ b/cuda_core/cuda/core/experimental/_stream.py @@ -147,9 +147,9 @@ def __cuda_stream__(self) -> Tuple[int, int]: return (0, self.handle) @property - def handle(self) -> int: + def handle(self) -> "CUstream": """Return the underlying cudaStream_t pointer address as Python int.""" - return int(self._mnff.handle) + return self._mnff.handle @property def is_nonblocking(self) -> bool: diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/experimental/include/utility.hpp new file mode 100644 index 000000000..b9f301a54 --- /dev/null +++ b/cuda_core/cuda/core/experimental/include/utility.hpp @@ -0,0 +1,17 @@ +#include + +// In cuda.bindings 12.8, the private member name was renamed from "_ptr" to "_pvt_ptr". +// We want to have the C++ layer supporting all past 12.x versions, so some tricks are needed. +// Since there's no std::has_member so we use SFINAE to create the same effect. + +template ::_pvt_ptr)>, bool> = true> +inline auto& get_cuda_native_handle(const T& obj) { + return *(obj->_pvt_ptr); +} + +template ::_ptr)>, bool> = true> +inline auto& get_cuda_native_handle(const T& obj) { + return *(obj->_ptr); +} diff --git a/cuda_core/tests/cython/build_tests.sh b/cuda_core/tests/cython/build_tests.sh new file mode 100755 index 000000000..d85ccec77 --- /dev/null +++ b/cuda_core/tests/cython/build_tests.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +SCRIPTPATH=$(dirname $(realpath "$0")) +CPLUS_INCLUDE_PATH=$SCRIPTPATH/../../cuda/core/experimental/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH cythonize -3 -i $(dirname "$0")/test_*.pyx diff --git a/cuda_core/tests/cython/test_cython.py b/cuda_core/tests/cython/test_cython.py new file mode 100644 index 000000000..9ffe7f2dc --- /dev/null +++ b/cuda_core/tests/cython/test_cython.py @@ -0,0 +1,42 @@ +# Copyright 2021-2024 NVIDIA Corporation. All rights reserved. +# +# Please refer to the NVIDIA end user license agreement (EULA) associated +# with this source code for terms and conditions that govern your use of +# this software. Any use, reproduction, disclosure, or distribution of +# this software and related documentation outside the terms of the EULA +# is strictly prohibited. +import functools +import importlib +import sys + + +def py_func(func): + """ + Wraps func in a plain Python function. + """ + + @functools.wraps(func) + def wrapped(*args, **kwargs): + return func(*args, **kwargs) + + return wrapped + + +cython_test_modules = [ + "test_get_cuda_native_handle", +] + + +for mod in cython_test_modules: + try: + # For each callable in `mod` with name `test_*`, + # wrap the callable in a plain Python function + # and set the result as an attribute of this module. + mod = importlib.import_module(mod) + for name in dir(mod): + item = getattr(mod, name) + if callable(item) and name.startswith("test_"): + item = py_func(item) + setattr(sys.modules[__name__], name, item) + except ImportError: + raise diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx new file mode 100644 index 000000000..2d4a92597 --- /dev/null +++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx @@ -0,0 +1,32 @@ +# distutils: language = c++ +# distutils: extra_compile_args = -std=c++17 + +from libc.stdint cimport intptr_t + +from cuda.bindings.driver cimport (CUstream as pyCUstream, + CUevent as pyCUevent) +from cuda.bindings.cydriver cimport CUstream, CUevent + +from cuda.core.experimental import Device + + +cdef extern from "utility.hpp": + void* get_cuda_native_handle[T](T) + + +def test_get_cuda_native_handle(): + dev = Device(0) + dev.set_current() + + s = dev.create_stream() + cdef pyCUstream s_py = s.handle + cdef CUstream s_c = get_cuda_native_handle(s_py) + assert (s_c) == (int(s_py)) + + e = s.record() + cdef pyCUevent e_py = e.handle + cdef CUevent e_c = get_cuda_native_handle(e_py) + assert (e_c) == (int(e_py)) + + e.close() + s.close() diff --git a/cuda_core/tests/pytest.ini b/cuda_core/tests/pytest.ini new file mode 100644 index 000000000..76f66832c --- /dev/null +++ b/cuda_core/tests/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +norecursedirs = cython From 299ce8dc850376f6ee3cc2adf496f0004f77483e Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 23 Feb 2025 23:52:48 +0000 Subject: [PATCH 03/16] improve type hints to make Ruff happy --- cuda_core/cuda/core/experimental/_event.py | 9 +++++++-- cuda_core/cuda/core/experimental/_linker.py | 9 +++++++-- cuda_core/cuda/core/experimental/_program.py | 12 ++++++++++-- cuda_core/cuda/core/experimental/_stream.py | 3 ++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index 5d7032927..9f700c923 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -2,12 +2,17 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from __future__ import annotations + import weakref from dataclasses import dataclass -from typing import Optional +from typing import TYPE_CHECKING, Optional from cuda.core.experimental._utils import CUDAError, check_or_create_options, driver, handle_return +if TYPE_CHECKING: + import cuda.bindings + @dataclass class EventOptions: @@ -130,6 +135,6 @@ def is_done(self) -> bool: raise CUDAError(f"unexpected error: {result}") @property - def handle(self) -> "CUevent": + def handle(self) -> cuda.bindings.driver.CUevent: """Return the underlying cudaEvent_t pointer address as Python int.""" return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index fe8c462f4..994634180 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -2,13 +2,18 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from __future__ import annotations + import ctypes import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union from warnings import warn +if TYPE_CHECKING: + import cuda.bindings + from cuda.core.experimental._device import Device from cuda.core.experimental._module import ObjectCode from cuda.core.experimental._utils import check_or_create_options, driver, handle_return, is_sequence @@ -324,7 +329,7 @@ def _exception_manager(self): nvJitLinkHandleT = int -LinkerHandleT = Union[nvJitLinkHandleT, "CUlinkState"] +LinkerHandleT = Union[nvJitLinkHandleT, "cuda.bindings.driver.CUlinkState"] class Linker: diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 5e3957345..de3bea494 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -2,11 +2,16 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from __future__ import annotations + import weakref from dataclasses import dataclass -from typing import List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union from warnings import warn +if TYPE_CHECKING: + import cuda.bindings + from cuda.core.experimental._device import Device from cuda.core.experimental._linker import Linker, LinkerHandleT, LinkerOptions from cuda.core.experimental._module import ObjectCode @@ -331,6 +336,9 @@ def __repr__(self): return self._formatted_options +ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT] + + class Program: """Represent a compilation machinery to process programs into :obj:`~_module.ObjectCode`. @@ -498,6 +506,6 @@ def backend(self) -> str: return self._backend @property - def handle(self) -> Union["nvrtcProgram", LinkerHandleT]: + def handle(self) -> ProgramHandleT: """Return the underlying handle object.""" return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py index a980ae2c5..c4529bbbb 100644 --- a/cuda_core/cuda/core/experimental/_stream.py +++ b/cuda_core/cuda/core/experimental/_stream.py @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Optional, Tuple, Union if TYPE_CHECKING: + import cuda.bindings from cuda.core.experimental._device import Device from cuda.core.experimental._context import Context from cuda.core.experimental._event import Event, EventOptions @@ -147,7 +148,7 @@ def __cuda_stream__(self) -> Tuple[int, int]: return (0, self.handle) @property - def handle(self) -> "CUstream": + def handle(self) -> cuda.bindings.driver.CUstream: """Return the underlying cudaStream_t pointer address as Python int.""" return self._mnff.handle From 674cee7e14d7ca9ca3baee436e3384239415a6c7 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 00:01:35 +0000 Subject: [PATCH 04/16] update license headers --- cuda_core/cuda/core/experimental/_event.py | 2 +- cuda_core/cuda/core/experimental/_linker.py | 2 +- cuda_core/cuda/core/experimental/_stream.py | 2 +- cuda_core/cuda/core/experimental/include/utility.hpp | 6 ++++++ cuda_core/tests/cython/test_cython.py | 9 +++------ cuda_core/tests/cython/test_get_cuda_native_handle.pyx | 4 ++++ cuda_core/tests/pytest.ini | 4 ++++ cuda_core/tests/test_linker.py | 2 +- cuda_core/tests/test_program.py | 8 ++------ 9 files changed, 23 insertions(+), 16 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index 9f700c923..e959f41e9 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 994634180..2d42a44e0 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py index c4529bbbb..2f822dbef 100644 --- a/cuda_core/cuda/core/experimental/_stream.py +++ b/cuda_core/cuda/core/experimental/_stream.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/experimental/include/utility.hpp index b9f301a54..d70462ce6 100644 --- a/cuda_core/cuda/core/experimental/include/utility.hpp +++ b/cuda_core/cuda/core/experimental/include/utility.hpp @@ -1,3 +1,9 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +// +// SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +#pragma once + #include // In cuda.bindings 12.8, the private member name was renamed from "_ptr" to "_pvt_ptr". diff --git a/cuda_core/tests/cython/test_cython.py b/cuda_core/tests/cython/test_cython.py index 9ffe7f2dc..dc887d656 100644 --- a/cuda_core/tests/cython/test_cython.py +++ b/cuda_core/tests/cython/test_cython.py @@ -1,10 +1,7 @@ -# Copyright 2021-2024 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# Please refer to the NVIDIA end user license agreement (EULA) associated -# with this source code for terms and conditions that govern your use of -# this software. Any use, reproduction, disclosure, or distribution of -# this software and related documentation outside the terms of the EULA -# is strictly prohibited. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + import functools import importlib import sys diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx index 2d4a92597..54e6aa013 100644 --- a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx +++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx @@ -1,3 +1,7 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + # distutils: language = c++ # distutils: extra_compile_args = -std=c++17 diff --git a/cuda_core/tests/pytest.ini b/cuda_core/tests/pytest.ini index 76f66832c..b703c9832 100644 --- a/cuda_core/tests/pytest.ini +++ b/cuda_core/tests/pytest.ini @@ -1,2 +1,6 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + [pytest] norecursedirs = cython diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index 939890f4c..59e8ca573 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 88ed76c85..05132a201 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -1,10 +1,6 @@ -# Copyright 2024 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# Please refer to the NVIDIA end user license agreement (EULA) associated -# with this source code for terms and conditions that govern your use of -# this software. Any use, reproduction, disclosure, or distribution of -# this software and related documentation outside the terms of the EULA -# is strictly prohibited. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import warnings From 650e3ae8cbac349c3a50f554879849c620c5e625 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 00:34:55 +0000 Subject: [PATCH 05/16] update release notes --- cuda_core/docs/source/release/0.2.0-notes.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 81e4e1292..39cb586c1 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -9,20 +9,22 @@ Highlights ---------- - Add :class:`~ProgramOptions` to facilitate the passing of runtime compile options to :obj:`~Program`. -- Add :class:`~DeviceProperties` to provide pythonic access to device properties. -- Add kernel attributes to :class:`~Kernel` +- Add pythonic access to :class:`Device` and :class:`~_module.Kernel` attributes. Breaking Changes ---------------- -- Change ``__cuda_stream__`` from attribute to method -- The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`. -- :meth:`~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. +- Change ``__cuda_stream__`` from attribute to method. +- The :meth:`Program.compile` method no longer accepts the ``options`` argument. Instead, you can optionally pass an instance of :class:`ProgramOptions` to the constructor of :class:`Program`. +- :meth:`Device.properties` now provides attribute getters instead of a dictionary interface. +- The ``.handle`` attribute of various ``cuda.core`` objects now returns the underlying Python object instead of a (type-erased) Python integer. New features ------------ - Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`. +- A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`). +- For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information. Limitations ----------- From 18a9c85d813f5f57e36a6019bddd065c6f3eb086 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 00:41:55 +0000 Subject: [PATCH 06/16] also test nvrtc --- .../tests/cython/test_get_cuda_native_handle.pyx | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx index 54e6aa013..efc5c9fef 100644 --- a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx +++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx @@ -9,9 +9,11 @@ from libc.stdint cimport intptr_t from cuda.bindings.driver cimport (CUstream as pyCUstream, CUevent as pyCUevent) +from cuda.bindings.nvrtc cimport nvrtcProgram as pynvrtcProgram from cuda.bindings.cydriver cimport CUstream, CUevent +from cuda.bindings.cynvrtc cimport nvrtcProgram -from cuda.core.experimental import Device +from cuda.core.experimental import Device, Program cdef extern from "utility.hpp": @@ -32,5 +34,12 @@ def test_get_cuda_native_handle(): cdef CUevent e_c = get_cuda_native_handle(e_py) assert (e_c) == (int(e_py)) + prog = Program("extern \"C\" __global__ void dummy() {}", "c++") + assert prog.backend == "NVRTC" + cdef pynvrtcProgram prog_py = prog.handle + cdef nvrtcProgram prog_c = get_cuda_native_handle(prog_py) + assert (prog_c) == (int(prog_py)) + + prog.close() e.close() s.close() From b59918bbe6ac68612f7b6fbe3adb5db2efa69453 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 02:13:13 +0000 Subject: [PATCH 07/16] bug fixes and test updates --- cuda_core/cuda/core/experimental/_memoryview.pyx | 4 ++-- cuda_core/tests/example_tests/test_basic_examples.py | 3 +++ cuda_core/tests/test_stream.py | 6 ++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx index feca22782..7ebfa4806 100644 --- a/cuda_core/cuda/core/experimental/_memoryview.pyx +++ b/cuda_core/cuda/core/experimental/_memoryview.pyx @@ -177,11 +177,11 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None): cdef object capsule try: capsule = obj.__dlpack__( - stream=stream_ptr, + stream=int(stream_ptr) if stream_ptr else None, max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION)) except TypeError: capsule = obj.__dlpack__( - stream=stream_ptr) + stream=int(stream_ptr) if stream_ptr else None) cdef void* data = NULL if cpython.PyCapsule_IsValid( diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py index 9a9432cb8..1791007e6 100644 --- a/cuda_core/tests/example_tests/test_basic_examples.py +++ b/cuda_core/tests/example_tests/test_basic_examples.py @@ -13,6 +13,7 @@ import pytest +from cuda.core.experimental import Device from .utils import run_example samples_path = os.path.join(os.path.dirname(__file__), "..", "..", "examples") @@ -23,3 +24,5 @@ class TestExamples: def test_example(self, example, deinit_cuda): run_example(samples_path, example) + if Device().device_id != 0: + Device(0).set_current() diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py index 9c661192f..e8b59b34f 100644 --- a/cuda_core/tests/test_stream.py +++ b/cuda_core/tests/test_stream.py @@ -11,6 +11,7 @@ from cuda.core.experimental import Device, Stream, StreamOptions from cuda.core.experimental._event import Event from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM, default_stream +from cuda.core.experimental._utils import driver def test_stream_init(): @@ -26,7 +27,7 @@ def test_stream_init_with_options(init_cuda): def test_stream_handle(init_cuda): stream = Device().create_stream(options=StreamOptions()) - assert isinstance(stream.handle, int) + assert isinstance(stream.handle, driver.CUstream) def test_stream_is_nonblocking(init_cuda): @@ -90,7 +91,8 @@ def test_stream_from_foreign_stream(init_cuda): device = Device() other_stream = device.create_stream(options=StreamOptions()) stream = device.create_stream(obj=other_stream) - assert other_stream.handle == stream.handle + # convert to int to work around NVIDIA/cuda-python#465 + assert int(other_stream.handle) == int(stream.handle) device = stream.device assert isinstance(device, Device) context = stream.context From aee4a4f6ecfea02c707e553d4b35d62ee4fe80b2 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 02:56:52 +0000 Subject: [PATCH 08/16] more doc fixes --- cuda_core/cuda/core/experimental/_event.py | 2 +- cuda_core/cuda/core/experimental/_module.py | 4 ++-- cuda_core/cuda/core/experimental/_stream.py | 2 +- cuda_core/docs/source/_templates/autosummary/class.rst | 2 +- cuda_core/docs/source/api_private.rst | 2 +- cuda_core/docs/source/conf.py | 1 + cuda_core/tests/example_tests/test_basic_examples.py | 1 + 7 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index e959f41e9..3b269179b 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -136,5 +136,5 @@ def is_done(self) -> bool: @property def handle(self) -> cuda.bindings.driver.CUevent: - """Return the underlying cudaEvent_t pointer address as Python int.""" + """Return the underlying CUevent object.""" return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index c4535c279..14a5b6211 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -203,7 +203,7 @@ def _from_obj(obj, mod): return ker @property - def attributes(self): + def attributes(self) -> KernelAttributes: """Get the read-only attributes of this kernel.""" if self._attributes is None: self._attributes = KernelAttributes._init(self._handle) @@ -294,7 +294,7 @@ def _lazy_load_module(self, *args, **kwargs): self._handle = handle_return(self._loader["data"](module, 0, [], [])) @precondition(_lazy_load_module) - def get_kernel(self, name): + def get_kernel(self, name) -> Kernel: """Return the :obj:`~_module.Kernel` of a specified name from this object code. Parameters diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py index 2f822dbef..93479340a 100644 --- a/cuda_core/cuda/core/experimental/_stream.py +++ b/cuda_core/cuda/core/experimental/_stream.py @@ -149,7 +149,7 @@ def __cuda_stream__(self) -> Tuple[int, int]: @property def handle(self) -> cuda.bindings.driver.CUstream: - """Return the underlying cudaStream_t pointer address as Python int.""" + """Return the underlying ``CUstream`` object.""" return self._mnff.handle @property diff --git a/cuda_core/docs/source/_templates/autosummary/class.rst b/cuda_core/docs/source/_templates/autosummary/class.rst index b45a3fd5b..25bc697d0 100644 --- a/cuda_core/docs/source/_templates/autosummary/class.rst +++ b/cuda_core/docs/source/_templates/autosummary/class.rst @@ -20,7 +20,7 @@ .. rubric:: {{ _('Attributes') }} {% for item in attributes %} - .. autoattribute:: {{ item }} + .. autoproperty:: {{ item }} {%- endfor %} {% endif %} {% endblock %} diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index 45d32808b..b5caaf479 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -17,6 +17,7 @@ CUDA runtime _stream.Stream _event.Event _device.DeviceProperties + _module.KernelAttributes CUDA compilation toolchain @@ -26,4 +27,3 @@ CUDA compilation toolchain :toctree: generated/ _module.Kernel - _module.ObjectCode diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py index 9e0972e03..d3c89b6ee 100644 --- a/cuda_core/docs/source/conf.py +++ b/cuda_core/docs/source/conf.py @@ -95,6 +95,7 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "cuda.bindings": ("https://nvidia.github.io/cuda-python/cuda-bindings/latest", None), } napoleon_google_docstring = False diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py index 1791007e6..f085e3c61 100644 --- a/cuda_core/tests/example_tests/test_basic_examples.py +++ b/cuda_core/tests/example_tests/test_basic_examples.py @@ -14,6 +14,7 @@ import pytest from cuda.core.experimental import Device + from .utils import run_example samples_path = os.path.join(os.path.dirname(__file__), "..", "..", "examples") From 2189cb1486ddf290c39401da61cf42bd65a4a9fc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 04:48:32 +0000 Subject: [PATCH 09/16] fix workflow --- .github/workflows/test-wheel-linux.yml | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 9d2bdde4b..8e82c977b 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -62,8 +62,16 @@ jobs: TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then SKIP_CUDA_BINDINGS_TEST=1 + SKIP_CUDA_CORE_CYTHON_TEST=0 else SKIP_CUDA_BINDINGS_TEST=0 + BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.build-ctk-ver }})" + TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.cuda-version }})" + if [[ $BUILD_CUDA_MINOR != $TEST_CUDA_MINOR ]]; then + SKIP_CUDA_CORE_CYTHON_TEST=1 + else + SKIP_CUDA_CORE_CYTHON_TEST=0 + fi fi # make outputs from the previous job as env vars @@ -77,6 +85,7 @@ jobs: echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV + echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV - name: Install dependencies uses: ./.github/actions/install_unix_deps @@ -197,8 +206,8 @@ jobs: exit 1 fi pytest -rxXs tests/cython - popd fi + popd - name: Run cuda.core tests run: | @@ -221,11 +230,13 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" pytest -rxXs tests/ - popd # It is a bit convoluted to run the Cython tests against CTK wheels, - # so let's just skip them. - if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + # so let's just skip them. Also, currently our CI always installs the + # latest bindings (from either major version). This is not compatible + # with the test requirements. + if [[ "${{ inputs.local-ctk }}" == 1 && "${SKIP_CUDA_CORE_CYTHON_TEST}" == 0 ]]; then + pip install cython if [[ "${{ inputs.host-platform }}" == linux* ]]; then bash tests/cython/build_tests.sh elif [[ "${{ inputs.host-platform }}" == win* ]]; then @@ -233,8 +244,8 @@ jobs: exit 1 fi pytest -rxXs tests/cython - popd fi + popd - name: Ensure cuda-python installable run: | From 37a6850ff78691eea74e0cdbd58c4a5d1eb6b638 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 14:08:29 +0000 Subject: [PATCH 10/16] fix headers not bundled; apply suggestion to header --- cuda_core/cuda/core/experimental/include/utility.hpp | 4 ++-- cuda_core/pyproject.toml | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/experimental/include/utility.hpp index d70462ce6..0dd14f713 100644 --- a/cuda_core/cuda/core/experimental/include/utility.hpp +++ b/cuda_core/cuda/core/experimental/include/utility.hpp @@ -11,13 +11,13 @@ // Since there's no std::has_member so we use SFINAE to create the same effect. template ::_pvt_ptr)>, bool> = true> + std::enable_if_t::_pvt_ptr)>, int> = 0> inline auto& get_cuda_native_handle(const T& obj) { return *(obj->_pvt_ptr); } template ::_ptr)>, bool> = true> + std::enable_if_t::_ptr)>, int> = 0> inline auto& get_cuda_native_handle(const T& obj) { return *(obj->_ptr); } diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index 8d7bc74a9..74820761c 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -59,6 +59,9 @@ issues = "https://github.com/NVIDIA/cuda-python/issues/" [tool.setuptools.packages.find] include = ["cuda.core*"] +[tool.setuptools.package-data] +"cuda.core.experimental.include" = ["*.h", "*.hpp", "*.cuh"] + [tool.setuptools.dynamic] version = { attr = "cuda.core._version.__version__" } readme = { file = ["DESCRIPTION.rst"], content-type = "text/x-rst" } From 43654c4bf133f2be8f914a020a254ec49455a271 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 14:11:52 +0000 Subject: [PATCH 11/16] ensure no chance to return None --- cuda_core/cuda/core/experimental/_program.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index de3bea494..fd3471512 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -497,8 +497,8 @@ def compile(self, target_type, name_expressions=(), logs=None): return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) - if self._backend in ("nvJitLink", "driver"): - return self._linker.link(target_type) + assert self._backend in ("nvJitLink", "driver"): + return self._linker.link(target_type) @property def backend(self) -> str: From bc9595b1109aa3c35ad40d260428cf0ba4c1e4bd Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 14:12:46 +0000 Subject: [PATCH 12/16] fix CI again --- .github/workflows/test-wheel-linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 8e82c977b..41ea38d45 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -236,7 +236,7 @@ jobs: # latest bindings (from either major version). This is not compatible # with the test requirements. if [[ "${{ inputs.local-ctk }}" == 1 && "${SKIP_CUDA_CORE_CYTHON_TEST}" == 0 ]]; then - pip install cython + pip install cython setuptools # setuptools needed starting PY312 if [[ "${{ inputs.host-platform }}" == linux* ]]; then bash tests/cython/build_tests.sh elif [[ "${{ inputs.host-platform }}" == win* ]]; then From 7db66421dade697035ca601055a46673b5bd4afc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 14:15:36 +0000 Subject: [PATCH 13/16] fix typo --- cuda_core/cuda/core/experimental/_program.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index fd3471512..61a4bf693 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -497,7 +497,7 @@ def compile(self, target_type, name_expressions=(), logs=None): return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) - assert self._backend in ("nvJitLink", "driver"): + assert self._backend in ("nvJitLink", "driver") return self._linker.link(target_type) @property From 63f7feb74cffa3c460be5e78600926eff411dd6a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 21:56:43 +0000 Subject: [PATCH 14/16] add .code attribute to ObjectCode --- cuda_core/cuda/core/experimental/_module.py | 10 +++++++++- cuda_core/tests/test_module.py | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 14a5b6211..041602691 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE @@ -212,6 +212,9 @@ def attributes(self) -> KernelAttributes: # TODO: implement from_handle() +CodeTypeT = Union[bytes, bytearray, str] + + class ObjectCode: """Represent a compiled program to be loaded onto the device. @@ -317,3 +320,8 @@ def get_kernel(self, name) -> Kernel: data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) + + @property + def code(self) -> CodeTypeT: + """Return the underlying code object.""" + return self._module diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 355a0c49a..8528c4d53 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -96,6 +96,7 @@ def test_object_code_load_cubin(get_saxpy_kernel): sym_map = mod._sym_map assert isinstance(cubin, bytes) mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map) + assert mod.code == cubin mod.get_kernel("saxpy") # force loading @@ -107,4 +108,5 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): cubin_file = tmp_path / "test.cubin" cubin_file.write_bytes(cubin) mod = ObjectCode.from_cubin(str(cubin_file), symbol_mapping=sym_map) + assert mod.code == str(cubin_file) mod.get_kernel("saxpy") # force loading From c96f6de31571dae3c78923a704c49ec1f7b9ce99 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 24 Feb 2025 22:03:07 +0000 Subject: [PATCH 15/16] add notes to objects that have different backends --- cuda_core/cuda/core/experimental/_linker.py | 7 ++++++- cuda_core/cuda/core/experimental/_program.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 2d42a44e0..43d1eb3c6 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -493,7 +493,12 @@ def _input_type_from_code_type(self, code_type: str): @property def handle(self) -> LinkerHandleT: - """Return the underlying handle object.""" + """Return the underlying handle object. + + .. note:: + + The type of the returned object depends on the backend. + """ return self._mnff.handle @property diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 61a4bf693..662add23c 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -507,5 +507,10 @@ def backend(self) -> str: @property def handle(self) -> ProgramHandleT: - """Return the underlying handle object.""" + """Return the underlying handle object. + + .. note:: + + The type of the returned object depends on the backend. + """ return self._mnff.handle From e4cde772e7761ff5e68271175b6543ff09f37698 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 25 Feb 2025 05:12:02 +0000 Subject: [PATCH 16/16] add -v --- .github/workflows/test-wheel-linux.yml | 8 ++++---- .github/workflows/test-wheel-windows.yml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 41ea38d45..322f859e3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -194,7 +194,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -rxXs tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -205,7 +205,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs tests/cython + pytest -rxXs -v tests/cython fi popd @@ -229,7 +229,7 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. Also, currently our CI always installs the @@ -243,7 +243,7 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs tests/cython + pytest -rxXs -v tests/cython fi popd diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 2312085df..4e48590a3 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -186,8 +186,8 @@ jobs: Push-Location ./cuda_bindings pip install -r requirements.txt - pytest -rxXs tests/ - # skip Cython tests for now + pytest -rxXs -v tests/ + # skip Cython tests for now (NVIDIA/cuda-python#466) Pop-Location - name: Run cuda.core tests @@ -210,7 +210,7 @@ jobs: Push-Location ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs tests/ + pytest -rxXs -v tests/ Pop-Location - name: Ensure cuda-python installable