diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index c15de07e5..322f859e3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -62,8 +62,16 @@ jobs: TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then SKIP_CUDA_BINDINGS_TEST=1 + SKIP_CUDA_CORE_CYTHON_TEST=0 else SKIP_CUDA_BINDINGS_TEST=0 + BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.build-ctk-ver }})" + TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.cuda-version }})" + if [[ $BUILD_CUDA_MINOR != $TEST_CUDA_MINOR ]]; then + SKIP_CUDA_CORE_CYTHON_TEST=1 + else + SKIP_CUDA_CORE_CYTHON_TEST=0 + fi fi # make outputs from the previous job as env vars @@ -77,6 +85,7 @@ jobs: echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV + echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV - name: Install dependencies uses: ./.github/actions/install_unix_deps @@ -185,7 +194,7 @@ jobs: pushd ./cuda_bindings pip install -r requirements.txt - pytest -rxXs tests/ + pytest -rxXs -v tests/ # It is a bit convoluted to run the Cython tests against CTK wheels, # so let's just skip them. @@ -196,9 +205,9 @@ jobs: # TODO: enable this once win-64 runners are up exit 1 fi - pytest -rxXs tests/cython - popd + pytest -rxXs -v tests/cython fi + popd - name: Run cuda.core tests run: | @@ -220,7 +229,22 @@ jobs: pushd ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs tests/ + pytest -rxXs -v tests/ + + # It is a bit convoluted to run the Cython tests against CTK wheels, + # so let's just skip them. Also, currently our CI always installs the + # latest bindings (from either major version). This is not compatible + # with the test requirements. + if [[ "${{ inputs.local-ctk }}" == 1 && "${SKIP_CUDA_CORE_CYTHON_TEST}" == 0 ]]; then + pip install cython setuptools # setuptools needed starting PY312 + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + bash tests/cython/build_tests.sh + elif [[ "${{ inputs.host-platform }}" == win* ]]; then + # TODO: enable this once win-64 runners are up + exit 1 + fi + pytest -rxXs -v tests/cython + fi popd - name: Ensure cuda-python installable diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 2312085df..4e48590a3 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -186,8 +186,8 @@ jobs: Push-Location ./cuda_bindings pip install -r requirements.txt - pytest -rxXs tests/ - # skip Cython tests for now + pytest -rxXs -v tests/ + # skip Cython tests for now (NVIDIA/cuda-python#466) Pop-Location - name: Run cuda.core tests @@ -210,7 +210,7 @@ jobs: Push-Location ./cuda_core pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs tests/ + pytest -rxXs -v tests/ Pop-Location - name: Ensure cuda-python installable diff --git a/cuda_core/README.md b/cuda_core/README.md index 534cdb8fe..3743f8fae 100644 --- a/cuda_core/README.md +++ b/cuda_core/README.md @@ -41,3 +41,14 @@ for more details, including how to sign your commits. To run these tests: * `python -m pytest tests/` against editable installations * `pytest tests/` against installed packages + +### Cython Unit Tests + +Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [those of cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them: + +1. Setup environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation. +2. Run `build_tests` script located in `test/cython` appropriate to your platform. This will both cythonize the tests and build them. + +To run these tests: +* `python -m pytest tests/cython/` against editable installations +* `pytest tests/cython/` against installed packages diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py index 06005f95a..3b269179b 100644 --- a/cuda_core/cuda/core/experimental/_event.py +++ b/cuda_core/cuda/core/experimental/_event.py @@ -1,13 +1,18 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from __future__ import annotations + import weakref from dataclasses import dataclass -from typing import Optional +from typing import TYPE_CHECKING, Optional from cuda.core.experimental._utils import CUDAError, check_or_create_options, driver, handle_return +if TYPE_CHECKING: + import cuda.bindings + @dataclass class EventOptions: @@ -130,6 +135,6 @@ def is_done(self) -> bool: raise CUDAError(f"unexpected error: {result}") @property - def handle(self) -> int: - """Return the underlying cudaEvent_t pointer address as Python int.""" - return int(self._mnff.handle) + def handle(self) -> cuda.bindings.driver.CUevent: + """Return the underlying CUevent object.""" + return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py index 2f84f3502..43d1eb3c6 100644 --- a/cuda_core/cuda/core/experimental/_linker.py +++ b/cuda_core/cuda/core/experimental/_linker.py @@ -1,14 +1,19 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from __future__ import annotations + import ctypes import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union from warnings import warn +if TYPE_CHECKING: + import cuda.bindings + from cuda.core.experimental._device import Device from cuda.core.experimental._module import ObjectCode from cuda.core.experimental._utils import check_or_create_options, driver, handle_return, is_sequence @@ -323,6 +328,10 @@ def _exception_manager(self): raise e +nvJitLinkHandleT = int +LinkerHandleT = Union[nvJitLinkHandleT, "cuda.bindings.driver.CUlinkState"] + + class Linker: """Represent a linking machinery to link one or multiple object codes into :obj:`~cuda.core.experimental._module.ObjectCode` with the specified options. @@ -483,10 +492,20 @@ def _input_type_from_code_type(self, code_type: str): return input_type @property - def handle(self): - """Return the linker handle object.""" + def handle(self) -> LinkerHandleT: + """Return the underlying handle object. + + .. note:: + + The type of the returned object depends on the backend. + """ return self._mnff.handle + @property + def backend(self) -> str: + """Return this Linker instance's underlying backend.""" + return "nvJitLink" if self._mnff.use_nvjitlink else "driver" + def close(self): """Destroy this linker.""" self._mnff.close() diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx index feca22782..7ebfa4806 100644 --- a/cuda_core/cuda/core/experimental/_memoryview.pyx +++ b/cuda_core/cuda/core/experimental/_memoryview.pyx @@ -177,11 +177,11 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None): cdef object capsule try: capsule = obj.__dlpack__( - stream=stream_ptr, + stream=int(stream_ptr) if stream_ptr else None, max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION)) except TypeError: capsule = obj.__dlpack__( - stream=stream_ptr) + stream=int(stream_ptr) if stream_ptr else None) cdef void* data = NULL if cpython.PyCapsule_IsValid( diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index c4535c279..041602691 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE @@ -203,7 +203,7 @@ def _from_obj(obj, mod): return ker @property - def attributes(self): + def attributes(self) -> KernelAttributes: """Get the read-only attributes of this kernel.""" if self._attributes is None: self._attributes = KernelAttributes._init(self._handle) @@ -212,6 +212,9 @@ def attributes(self): # TODO: implement from_handle() +CodeTypeT = Union[bytes, bytearray, str] + + class ObjectCode: """Represent a compiled program to be loaded onto the device. @@ -294,7 +297,7 @@ def _lazy_load_module(self, *args, **kwargs): self._handle = handle_return(self._loader["data"](module, 0, [], [])) @precondition(_lazy_load_module) - def get_kernel(self, name): + def get_kernel(self, name) -> Kernel: """Return the :obj:`~_module.Kernel` of a specified name from this object code. Parameters @@ -317,3 +320,8 @@ def get_kernel(self, name): data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) + + @property + def code(self) -> CodeTypeT: + """Return the underlying code object.""" + return self._module diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py index 1bb48afc9..662add23c 100644 --- a/cuda_core/cuda/core/experimental/_program.py +++ b/cuda_core/cuda/core/experimental/_program.py @@ -2,13 +2,18 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +from __future__ import annotations + import weakref from dataclasses import dataclass -from typing import List, Optional, Tuple, Union +from typing import TYPE_CHECKING, List, Optional, Tuple, Union from warnings import warn +if TYPE_CHECKING: + import cuda.bindings + from cuda.core.experimental._device import Device -from cuda.core.experimental._linker import Linker, LinkerOptions +from cuda.core.experimental._linker import Linker, LinkerHandleT, LinkerOptions from cuda.core.experimental._module import ObjectCode from cuda.core.experimental._utils import ( _handle_boolean_option, @@ -331,6 +336,9 @@ def __repr__(self): return self._formatted_options +ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT] + + class Program: """Represent a compilation machinery to process programs into :obj:`~_module.ObjectCode`. @@ -382,7 +390,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], [])) - self._backend = "nvrtc" + self._backend = "NVRTC" self._linker = None elif code_type == "ptx": @@ -391,7 +399,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None): self._linker = Linker( ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) ) - self._backend = "linker" + self._backend = self._linker.backend else: raise NotImplementedError @@ -445,9 +453,9 @@ def compile(self, target_type, name_expressions=(), logs=None): """ if target_type not in self._supported_target_type: - raise NotImplementedError + raise ValueError(f"the target type {target_type} is not supported") - if self._backend == "nvrtc": + if self._backend == "NVRTC": if target_type == "ptx" and not self._can_load_generated_ptx(): warn( "The CUDA driver version is older than the backend version. " @@ -489,15 +497,20 @@ def compile(self, target_type, name_expressions=(), logs=None): return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping) - if self._backend == "linker": - return self._linker.link(target_type) + assert self._backend in ("nvJitLink", "driver") + return self._linker.link(target_type) @property - def backend(self): - """Return the backend type string associated with this program.""" + def backend(self) -> str: + """Return this Program instance's underlying backend.""" return self._backend @property - def handle(self): - """Return the program handle object.""" + def handle(self) -> ProgramHandleT: + """Return the underlying handle object. + + .. note:: + + The type of the returned object depends on the backend. + """ return self._mnff.handle diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py index 09d672365..93479340a 100644 --- a/cuda_core/cuda/core/experimental/_stream.py +++ b/cuda_core/cuda/core/experimental/_stream.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE @@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Optional, Tuple, Union if TYPE_CHECKING: + import cuda.bindings from cuda.core.experimental._device import Device from cuda.core.experimental._context import Context from cuda.core.experimental._event import Event, EventOptions @@ -147,9 +148,9 @@ def __cuda_stream__(self) -> Tuple[int, int]: return (0, self.handle) @property - def handle(self) -> int: - """Return the underlying cudaStream_t pointer address as Python int.""" - return int(self._mnff.handle) + def handle(self) -> cuda.bindings.driver.CUstream: + """Return the underlying ``CUstream`` object.""" + return self._mnff.handle @property def is_nonblocking(self) -> bool: diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/experimental/include/utility.hpp new file mode 100644 index 000000000..0dd14f713 --- /dev/null +++ b/cuda_core/cuda/core/experimental/include/utility.hpp @@ -0,0 +1,23 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +// +// SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +#pragma once + +#include + +// In cuda.bindings 12.8, the private member name was renamed from "_ptr" to "_pvt_ptr". +// We want to have the C++ layer supporting all past 12.x versions, so some tricks are needed. +// Since there's no std::has_member so we use SFINAE to create the same effect. + +template ::_pvt_ptr)>, int> = 0> +inline auto& get_cuda_native_handle(const T& obj) { + return *(obj->_pvt_ptr); +} + +template ::_ptr)>, int> = 0> +inline auto& get_cuda_native_handle(const T& obj) { + return *(obj->_ptr); +} diff --git a/cuda_core/docs/source/_templates/autosummary/class.rst b/cuda_core/docs/source/_templates/autosummary/class.rst index b45a3fd5b..25bc697d0 100644 --- a/cuda_core/docs/source/_templates/autosummary/class.rst +++ b/cuda_core/docs/source/_templates/autosummary/class.rst @@ -20,7 +20,7 @@ .. rubric:: {{ _('Attributes') }} {% for item in attributes %} - .. autoattribute:: {{ item }} + .. autoproperty:: {{ item }} {%- endfor %} {% endif %} {% endblock %} diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index 45d32808b..b5caaf479 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -17,6 +17,7 @@ CUDA runtime _stream.Stream _event.Event _device.DeviceProperties + _module.KernelAttributes CUDA compilation toolchain @@ -26,4 +27,3 @@ CUDA compilation toolchain :toctree: generated/ _module.Kernel - _module.ObjectCode diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py index 9e0972e03..d3c89b6ee 100644 --- a/cuda_core/docs/source/conf.py +++ b/cuda_core/docs/source/conf.py @@ -95,6 +95,7 @@ intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "numpy": ("https://numpy.org/doc/stable/", None), + "cuda.bindings": ("https://nvidia.github.io/cuda-python/cuda-bindings/latest", None), } napoleon_google_docstring = False diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst index 81e4e1292..39cb586c1 100644 --- a/cuda_core/docs/source/release/0.2.0-notes.rst +++ b/cuda_core/docs/source/release/0.2.0-notes.rst @@ -9,20 +9,22 @@ Highlights ---------- - Add :class:`~ProgramOptions` to facilitate the passing of runtime compile options to :obj:`~Program`. -- Add :class:`~DeviceProperties` to provide pythonic access to device properties. -- Add kernel attributes to :class:`~Kernel` +- Add pythonic access to :class:`Device` and :class:`~_module.Kernel` attributes. Breaking Changes ---------------- -- Change ``__cuda_stream__`` from attribute to method -- The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`. -- :meth:`~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary. +- Change ``__cuda_stream__`` from attribute to method. +- The :meth:`Program.compile` method no longer accepts the ``options`` argument. Instead, you can optionally pass an instance of :class:`ProgramOptions` to the constructor of :class:`Program`. +- :meth:`Device.properties` now provides attribute getters instead of a dictionary interface. +- The ``.handle`` attribute of various ``cuda.core`` objects now returns the underlying Python object instead of a (type-erased) Python integer. New features ------------ - Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`. +- A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`). +- For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information. Limitations ----------- diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml index 8d7bc74a9..74820761c 100644 --- a/cuda_core/pyproject.toml +++ b/cuda_core/pyproject.toml @@ -59,6 +59,9 @@ issues = "https://github.com/NVIDIA/cuda-python/issues/" [tool.setuptools.packages.find] include = ["cuda.core*"] +[tool.setuptools.package-data] +"cuda.core.experimental.include" = ["*.h", "*.hpp", "*.cuh"] + [tool.setuptools.dynamic] version = { attr = "cuda.core._version.__version__" } readme = { file = ["DESCRIPTION.rst"], content-type = "text/x-rst" } diff --git a/cuda_core/tests/cython/build_tests.sh b/cuda_core/tests/cython/build_tests.sh new file mode 100755 index 000000000..d85ccec77 --- /dev/null +++ b/cuda_core/tests/cython/build_tests.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +SCRIPTPATH=$(dirname $(realpath "$0")) +CPLUS_INCLUDE_PATH=$SCRIPTPATH/../../cuda/core/experimental/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH cythonize -3 -i $(dirname "$0")/test_*.pyx diff --git a/cuda_core/tests/cython/test_cython.py b/cuda_core/tests/cython/test_cython.py new file mode 100644 index 000000000..dc887d656 --- /dev/null +++ b/cuda_core/tests/cython/test_cython.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import functools +import importlib +import sys + + +def py_func(func): + """ + Wraps func in a plain Python function. + """ + + @functools.wraps(func) + def wrapped(*args, **kwargs): + return func(*args, **kwargs) + + return wrapped + + +cython_test_modules = [ + "test_get_cuda_native_handle", +] + + +for mod in cython_test_modules: + try: + # For each callable in `mod` with name `test_*`, + # wrap the callable in a plain Python function + # and set the result as an attribute of this module. + mod = importlib.import_module(mod) + for name in dir(mod): + item = getattr(mod, name) + if callable(item) and name.startswith("test_"): + item = py_func(item) + setattr(sys.modules[__name__], name, item) + except ImportError: + raise diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx new file mode 100644 index 000000000..efc5c9fef --- /dev/null +++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx @@ -0,0 +1,45 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +# distutils: language = c++ +# distutils: extra_compile_args = -std=c++17 + +from libc.stdint cimport intptr_t + +from cuda.bindings.driver cimport (CUstream as pyCUstream, + CUevent as pyCUevent) +from cuda.bindings.nvrtc cimport nvrtcProgram as pynvrtcProgram +from cuda.bindings.cydriver cimport CUstream, CUevent +from cuda.bindings.cynvrtc cimport nvrtcProgram + +from cuda.core.experimental import Device, Program + + +cdef extern from "utility.hpp": + void* get_cuda_native_handle[T](T) + + +def test_get_cuda_native_handle(): + dev = Device(0) + dev.set_current() + + s = dev.create_stream() + cdef pyCUstream s_py = s.handle + cdef CUstream s_c = get_cuda_native_handle(s_py) + assert (s_c) == (int(s_py)) + + e = s.record() + cdef pyCUevent e_py = e.handle + cdef CUevent e_c = get_cuda_native_handle(e_py) + assert (e_c) == (int(e_py)) + + prog = Program("extern \"C\" __global__ void dummy() {}", "c++") + assert prog.backend == "NVRTC" + cdef pynvrtcProgram prog_py = prog.handle + cdef nvrtcProgram prog_c = get_cuda_native_handle(prog_py) + assert (prog_c) == (int(prog_py)) + + prog.close() + e.close() + s.close() diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py index 9a9432cb8..f085e3c61 100644 --- a/cuda_core/tests/example_tests/test_basic_examples.py +++ b/cuda_core/tests/example_tests/test_basic_examples.py @@ -13,6 +13,8 @@ import pytest +from cuda.core.experimental import Device + from .utils import run_example samples_path = os.path.join(os.path.dirname(__file__), "..", "..", "examples") @@ -23,3 +25,5 @@ class TestExamples: def test_example(self, example, deinit_cuda): run_example(samples_path, example) + if Device().device_id != 0: + Device(0).set_current() diff --git a/cuda_core/tests/pytest.ini b/cuda_core/tests/pytest.ini new file mode 100644 index 000000000..b703c9832 --- /dev/null +++ b/cuda_core/tests/pytest.ini @@ -0,0 +1,6 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +[pytest] +norecursedirs = cython diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py index 732f70c32..59e8ca573 100644 --- a/cuda_core/tests/test_linker.py +++ b/cuda_core/tests/test_linker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE @@ -17,8 +17,8 @@ device_function_b = "__device__ int B() { return 0; }" device_function_c = "__device__ int C(int a, int b) { return a + b; }" -culink_backend = _linker._decide_nvjitlink_or_driver() -if not culink_backend: +is_culink_backend = _linker._decide_nvjitlink_or_driver() +if not is_culink_backend: from cuda.bindings import nvjitlink @@ -54,7 +54,7 @@ def compile_ltoir_functions(init_cuda): LinkerOptions(arch=ARCH, debug=True), LinkerOptions(arch=ARCH, lineinfo=True), ] -if not culink_backend: +if not is_culink_backend: options += [ LinkerOptions(arch=ARCH, time=True), LinkerOptions(arch=ARCH, optimize_unused_variables=True), @@ -85,16 +85,17 @@ def test_linker_init(compile_ptx_functions, options): linker = Linker(*compile_ptx_functions, options=options) object_code = linker.link("cubin") assert isinstance(object_code, ObjectCode) + assert linker.backend == ("driver" if is_culink_backend else "nvJitLink") def test_linker_init_invalid_arch(compile_ptx_functions): - err = AttributeError if culink_backend else nvjitlink.nvJitLinkError + err = AttributeError if is_culink_backend else nvjitlink.nvJitLinkError with pytest.raises(err): options = LinkerOptions(arch="99", ptx=True) Linker(*compile_ptx_functions, options=options) -@pytest.mark.skipif(culink_backend, reason="culink does not support ptx option") +@pytest.mark.skipif(is_culink_backend, reason="culink does not support ptx option") def test_linker_link_ptx_nvjitlink(compile_ltoir_functions): options = LinkerOptions(arch=ARCH, link_time_optimization=True, ptx=True) linker = Linker(*compile_ltoir_functions, options=options) @@ -102,7 +103,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions): assert isinstance(linked_code, ObjectCode) -@pytest.mark.skipif(not culink_backend, reason="nvjitlink requires lto for ptx linking") +@pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking") def test_linker_link_ptx_culink(compile_ptx_functions): options = LinkerOptions(arch=ARCH) linker = Linker(*compile_ptx_functions, options=options) diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 355a0c49a..8528c4d53 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -96,6 +96,7 @@ def test_object_code_load_cubin(get_saxpy_kernel): sym_map = mod._sym_map assert isinstance(cubin, bytes) mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map) + assert mod.code == cubin mod.get_kernel("saxpy") # force loading @@ -107,4 +108,5 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path): cubin_file = tmp_path / "test.cubin" cubin_file.write_bytes(cubin) mod = ObjectCode.from_cubin(str(cubin_file), symbol_mapping=sym_map) + assert mod.code == str(cubin_file) mod.get_kernel("saxpy") # force loading diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 8d2ecd1ab..05132a201 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -1,10 +1,6 @@ -# Copyright 2024 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # -# Please refer to the NVIDIA end user license agreement (EULA) associated -# with this source code for terms and conditions that govern your use of -# this software. Any use, reproduction, disclosure, or distribution of -# this software and related documentation outside the terms of the EULA -# is strictly prohibited. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import warnings @@ -14,6 +10,8 @@ from cuda.core.experimental._module import Kernel, ObjectCode from cuda.core.experimental._program import Program, ProgramOptions +is_culink_backend = _linker._decide_nvjitlink_or_driver() + @pytest.fixture(scope="module") def ptx_code_object(): @@ -50,7 +48,7 @@ def ptx_code_object(): def test_cpp_program_with_various_options(init_cuda, options): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++", options) - assert program.backend == "nvrtc" + assert program.backend == "NVRTC" program.compile("ptx") program.close() assert program.handle is None @@ -65,8 +63,7 @@ def test_cpp_program_with_various_options(init_cuda, options): ProgramOptions(prec_sqrt=True), ProgramOptions(fma=True), ] -if not _linker._decide_nvjitlink_or_driver(): - print("Using nvjitlink as the backend because decide() returned false") +if not is_culink_backend: options += [ ProgramOptions(time=True), ProgramOptions(split_compile=True), @@ -76,7 +73,7 @@ def test_cpp_program_with_various_options(init_cuda, options): @pytest.mark.parametrize("options", options) def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options): program = Program(ptx_code_object._module.decode(), "ptx", options=options) - assert program.backend == "linker" + assert program.backend == ("driver" if is_culink_backend else "nvJitLink") program.compile("cubin") program.close() assert program.handle is None @@ -85,7 +82,7 @@ def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options): def test_program_init_valid_code_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - assert program.backend == "nvrtc" + assert program.backend == "NVRTC" assert program.handle is not None @@ -125,14 +122,14 @@ def test_program_compile_valid_target_type(init_cuda): def test_program_compile_invalid_target_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - with pytest.raises(NotImplementedError): + with pytest.raises(ValueError): program.compile("invalid_target") def test_program_backend_property(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++") - assert program.backend == "nvrtc" + assert program.backend == "NVRTC" def test_program_handle_property(): diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py index 9c661192f..e8b59b34f 100644 --- a/cuda_core/tests/test_stream.py +++ b/cuda_core/tests/test_stream.py @@ -11,6 +11,7 @@ from cuda.core.experimental import Device, Stream, StreamOptions from cuda.core.experimental._event import Event from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM, default_stream +from cuda.core.experimental._utils import driver def test_stream_init(): @@ -26,7 +27,7 @@ def test_stream_init_with_options(init_cuda): def test_stream_handle(init_cuda): stream = Device().create_stream(options=StreamOptions()) - assert isinstance(stream.handle, int) + assert isinstance(stream.handle, driver.CUstream) def test_stream_is_nonblocking(init_cuda): @@ -90,7 +91,8 @@ def test_stream_from_foreign_stream(init_cuda): device = Device() other_stream = device.create_stream(options=StreamOptions()) stream = device.create_stream(obj=other_stream) - assert other_stream.handle == stream.handle + # convert to int to work around NVIDIA/cuda-python#465 + assert int(other_stream.handle) == int(stream.handle) device = stream.device assert isinstance(device, Device) context = stream.context