diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml
index c15de07e5..322f859e3 100644
--- a/.github/workflows/test-wheel-linux.yml
+++ b/.github/workflows/test-wheel-linux.yml
@@ -62,8 +62,16 @@ jobs:
           TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})"
           if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then
             SKIP_CUDA_BINDINGS_TEST=1
+            SKIP_CUDA_CORE_CYTHON_TEST=0
           else
             SKIP_CUDA_BINDINGS_TEST=0
+            BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.build-ctk-ver }})"
+            TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${{ inputs.cuda-version }})"
+            if [[ $BUILD_CUDA_MINOR != $TEST_CUDA_MINOR ]]; then
+              SKIP_CUDA_CORE_CYTHON_TEST=1
+            else
+              SKIP_CUDA_CORE_CYTHON_TEST=0
+            fi
           fi
 
           # make outputs from the previous job as env vars
@@ -77,6 +85,7 @@ jobs:
           echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV
           echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
           echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV
+          echo "SKIP_CUDA_CORE_CYTHON_TEST=${SKIP_CUDA_CORE_CYTHON_TEST}" >> $GITHUB_ENV
 
       - name: Install dependencies
         uses: ./.github/actions/install_unix_deps
@@ -185,7 +194,7 @@ jobs:
 
           pushd ./cuda_bindings
           pip install -r requirements.txt
-          pytest -rxXs tests/
+          pytest -rxXs -v tests/
 
           # It is a bit convoluted to run the Cython tests against CTK wheels,
           # so let's just skip them.
@@ -196,9 +205,9 @@ jobs:
               # TODO: enable this once win-64 runners are up 
               exit 1
             fi  
-            pytest -rxXs tests/cython
-            popd
+            pytest -rxXs -v tests/cython
           fi
+          popd
 
       - name: Run cuda.core tests
         run: |
@@ -220,7 +229,22 @@ jobs:
 
           pushd ./cuda_core
           pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt"
-          pytest -rxXs tests/
+          pytest -rxXs -v tests/
+
+          # It is a bit convoluted to run the Cython tests against CTK wheels,
+          # so let's just skip them. Also, currently our CI always installs the
+          # latest bindings (from either major version). This is not compatible
+          # with the test requirements.
+          if [[ "${{ inputs.local-ctk }}" == 1 && "${SKIP_CUDA_CORE_CYTHON_TEST}" == 0 ]]; then
+            pip install cython setuptools  # setuptools needed starting PY312
+            if [[ "${{ inputs.host-platform }}" == linux* ]]; then
+              bash tests/cython/build_tests.sh
+            elif [[ "${{ inputs.host-platform }}" == win* ]]; then
+              # TODO: enable this once win-64 runners are up
+              exit 1
+            fi
+            pytest -rxXs -v tests/cython
+          fi
           popd
 
       - name: Ensure cuda-python installable
diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 2312085df..4e48590a3 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -186,8 +186,8 @@ jobs:
 
           Push-Location ./cuda_bindings
           pip install -r requirements.txt
-          pytest -rxXs tests/
-          # skip Cython tests for now
+          pytest -rxXs -v tests/
+          # skip Cython tests for now (NVIDIA/cuda-python#466)
           Pop-Location
 
       - name: Run cuda.core tests
@@ -210,7 +210,7 @@ jobs:
 
           Push-Location ./cuda_core
           pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt"
-          pytest -rxXs tests/
+          pytest -rxXs -v tests/
           Pop-Location
 
       - name: Ensure cuda-python installable
diff --git a/cuda_core/README.md b/cuda_core/README.md
index 534cdb8fe..3743f8fae 100644
--- a/cuda_core/README.md
+++ b/cuda_core/README.md
@@ -41,3 +41,14 @@ for more details, including how to sign your commits.
 To run these tests:
 * `python -m pytest tests/` against editable installations
 * `pytest tests/` against installed packages
+
+### Cython Unit Tests
+
+Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [those of cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:
+
+1. Setup environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
+2. Run `build_tests` script located in `test/cython` appropriate to your platform. This will both cythonize the tests and build them.
+
+To run these tests:
+* `python -m pytest tests/cython/` against editable installations
+* `pytest tests/cython/` against installed packages
diff --git a/cuda_core/cuda/core/experimental/_event.py b/cuda_core/cuda/core/experimental/_event.py
index 06005f95a..3b269179b 100644
--- a/cuda_core/cuda/core/experimental/_event.py
+++ b/cuda_core/cuda/core/experimental/_event.py
@@ -1,13 +1,18 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+from __future__ import annotations
+
 import weakref
 from dataclasses import dataclass
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 
 from cuda.core.experimental._utils import CUDAError, check_or_create_options, driver, handle_return
 
+if TYPE_CHECKING:
+    import cuda.bindings
+
 
 @dataclass
 class EventOptions:
@@ -130,6 +135,6 @@ def is_done(self) -> bool:
             raise CUDAError(f"unexpected error: {result}")
 
     @property
-    def handle(self) -> int:
-        """Return the underlying cudaEvent_t pointer address as Python int."""
-        return int(self._mnff.handle)
+    def handle(self) -> cuda.bindings.driver.CUevent:
+        """Return the underlying CUevent object."""
+        return self._mnff.handle
diff --git a/cuda_core/cuda/core/experimental/_linker.py b/cuda_core/cuda/core/experimental/_linker.py
index 2f84f3502..43d1eb3c6 100644
--- a/cuda_core/cuda/core/experimental/_linker.py
+++ b/cuda_core/cuda/core/experimental/_linker.py
@@ -1,14 +1,19 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+from __future__ import annotations
+
 import ctypes
 import weakref
 from contextlib import contextmanager
 from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 from warnings import warn
 
+if TYPE_CHECKING:
+    import cuda.bindings
+
 from cuda.core.experimental._device import Device
 from cuda.core.experimental._module import ObjectCode
 from cuda.core.experimental._utils import check_or_create_options, driver, handle_return, is_sequence
@@ -323,6 +328,10 @@ def _exception_manager(self):
         raise e
 
 
+nvJitLinkHandleT = int
+LinkerHandleT = Union[nvJitLinkHandleT, "cuda.bindings.driver.CUlinkState"]
+
+
 class Linker:
     """Represent a linking machinery to link one or multiple object codes into
     :obj:`~cuda.core.experimental._module.ObjectCode` with the specified options.
@@ -483,10 +492,20 @@ def _input_type_from_code_type(self, code_type: str):
         return input_type
 
     @property
-    def handle(self):
-        """Return the linker handle object."""
+    def handle(self) -> LinkerHandleT:
+        """Return the underlying handle object.
+
+        .. note::
+
+           The type of the returned object depends on the backend.
+        """
         return self._mnff.handle
 
+    @property
+    def backend(self) -> str:
+        """Return this Linker instance's underlying backend."""
+        return "nvJitLink" if self._mnff.use_nvjitlink else "driver"
+
     def close(self):
         """Destroy this linker."""
         self._mnff.close()
diff --git a/cuda_core/cuda/core/experimental/_memoryview.pyx b/cuda_core/cuda/core/experimental/_memoryview.pyx
index feca22782..7ebfa4806 100644
--- a/cuda_core/cuda/core/experimental/_memoryview.pyx
+++ b/cuda_core/cuda/core/experimental/_memoryview.pyx
@@ -177,11 +177,11 @@ cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
     cdef object capsule
     try:
         capsule = obj.__dlpack__(
-            stream=stream_ptr,
+            stream=int(stream_ptr) if stream_ptr else None,
             max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION))
     except TypeError:
         capsule = obj.__dlpack__(
-            stream=stream_ptr)
+            stream=int(stream_ptr) if stream_ptr else None)
 
     cdef void* data = NULL
     if cpython.PyCapsule_IsValid(
diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py
index c4535c279..041602691 100644
--- a/cuda_core/cuda/core/experimental/_module.py
+++ b/cuda_core/cuda/core/experimental/_module.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
@@ -203,7 +203,7 @@ def _from_obj(obj, mod):
         return ker
 
     @property
-    def attributes(self):
+    def attributes(self) -> KernelAttributes:
         """Get the read-only attributes of this kernel."""
         if self._attributes is None:
             self._attributes = KernelAttributes._init(self._handle)
@@ -212,6 +212,9 @@ def attributes(self):
     # TODO: implement from_handle()
 
 
+CodeTypeT = Union[bytes, bytearray, str]
+
+
 class ObjectCode:
     """Represent a compiled program to be loaded onto the device.
 
@@ -294,7 +297,7 @@ def _lazy_load_module(self, *args, **kwargs):
                 self._handle = handle_return(self._loader["data"](module, 0, [], []))
 
     @precondition(_lazy_load_module)
-    def get_kernel(self, name):
+    def get_kernel(self, name) -> Kernel:
         """Return the :obj:`~_module.Kernel` of a specified name from this object code.
 
         Parameters
@@ -317,3 +320,8 @@ def get_kernel(self, name):
 
         data = handle_return(self._loader["kernel"](self._handle, name))
         return Kernel._from_obj(data, self)
+
+    @property
+    def code(self) -> CodeTypeT:
+        """Return the underlying code object."""
+        return self._module
diff --git a/cuda_core/cuda/core/experimental/_program.py b/cuda_core/cuda/core/experimental/_program.py
index 1bb48afc9..662add23c 100644
--- a/cuda_core/cuda/core/experimental/_program.py
+++ b/cuda_core/cuda/core/experimental/_program.py
@@ -2,13 +2,18 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+from __future__ import annotations
+
 import weakref
 from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 from warnings import warn
 
+if TYPE_CHECKING:
+    import cuda.bindings
+
 from cuda.core.experimental._device import Device
-from cuda.core.experimental._linker import Linker, LinkerOptions
+from cuda.core.experimental._linker import Linker, LinkerHandleT, LinkerOptions
 from cuda.core.experimental._module import ObjectCode
 from cuda.core.experimental._utils import (
     _handle_boolean_option,
@@ -331,6 +336,9 @@ def __repr__(self):
         return self._formatted_options
 
 
+ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT]
+
+
 class Program:
     """Represent a compilation machinery to process programs into
     :obj:`~_module.ObjectCode`.
@@ -382,7 +390,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
             # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
 
             self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
-            self._backend = "nvrtc"
+            self._backend = "NVRTC"
             self._linker = None
 
         elif code_type == "ptx":
@@ -391,7 +399,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
             self._linker = Linker(
                 ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
             )
-            self._backend = "linker"
+            self._backend = self._linker.backend
         else:
             raise NotImplementedError
 
@@ -445,9 +453,9 @@ def compile(self, target_type, name_expressions=(), logs=None):
 
         """
         if target_type not in self._supported_target_type:
-            raise NotImplementedError
+            raise ValueError(f"the target type {target_type} is not supported")
 
-        if self._backend == "nvrtc":
+        if self._backend == "NVRTC":
             if target_type == "ptx" and not self._can_load_generated_ptx():
                 warn(
                     "The CUDA driver version is older than the backend version. "
@@ -489,15 +497,20 @@ def compile(self, target_type, name_expressions=(), logs=None):
 
             return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping)
 
-        if self._backend == "linker":
-            return self._linker.link(target_type)
+        assert self._backend in ("nvJitLink", "driver")
+        return self._linker.link(target_type)
 
     @property
-    def backend(self):
-        """Return the backend type string associated with this program."""
+    def backend(self) -> str:
+        """Return this Program instance's underlying backend."""
         return self._backend
 
     @property
-    def handle(self):
-        """Return the program handle object."""
+    def handle(self) -> ProgramHandleT:
+        """Return the underlying handle object.
+
+        .. note::
+
+           The type of the returned object depends on the backend.
+        """
         return self._mnff.handle
diff --git a/cuda_core/cuda/core/experimental/_stream.py b/cuda_core/cuda/core/experimental/_stream.py
index 09d672365..93479340a 100644
--- a/cuda_core/cuda/core/experimental/_stream.py
+++ b/cuda_core/cuda/core/experimental/_stream.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
@@ -11,6 +11,7 @@
 from typing import TYPE_CHECKING, Optional, Tuple, Union
 
 if TYPE_CHECKING:
+    import cuda.bindings
     from cuda.core.experimental._device import Device
 from cuda.core.experimental._context import Context
 from cuda.core.experimental._event import Event, EventOptions
@@ -147,9 +148,9 @@ def __cuda_stream__(self) -> Tuple[int, int]:
         return (0, self.handle)
 
     @property
-    def handle(self) -> int:
-        """Return the underlying cudaStream_t pointer address as Python int."""
-        return int(self._mnff.handle)
+    def handle(self) -> cuda.bindings.driver.CUstream:
+        """Return the underlying ``CUstream`` object."""
+        return self._mnff.handle
 
     @property
     def is_nonblocking(self) -> bool:
diff --git a/cuda_core/cuda/core/experimental/include/utility.hpp b/cuda_core/cuda/core/experimental/include/utility.hpp
new file mode 100644
index 000000000..0dd14f713
--- /dev/null
+++ b/cuda_core/cuda/core/experimental/include/utility.hpp
@@ -0,0 +1,23 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+//
+// SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+#pragma once
+
+#include <type_traits>
+
+// In cuda.bindings 12.8, the private member name was renamed from "_ptr" to "_pvt_ptr".
+// We want to have the C++ layer supporting all past 12.x versions, so some tricks are needed.
+// Since there's no std::has_member<T, member_name> so we use SFINAE to create the same effect.
+
+template <typename T,
+          std::enable_if_t<std::is_pointer_v<decltype(std::remove_pointer_t<T>::_pvt_ptr)>, int> = 0>
+inline auto& get_cuda_native_handle(const T& obj) {
+    return *(obj->_pvt_ptr);
+}
+
+template <typename T,
+          std::enable_if_t<std::is_pointer_v<decltype(std::remove_pointer_t<T>::_ptr)>, int> = 0>
+inline auto& get_cuda_native_handle(const T& obj) {
+    return *(obj->_ptr);
+}
diff --git a/cuda_core/docs/source/_templates/autosummary/class.rst b/cuda_core/docs/source/_templates/autosummary/class.rst
index b45a3fd5b..25bc697d0 100644
--- a/cuda_core/docs/source/_templates/autosummary/class.rst
+++ b/cuda_core/docs/source/_templates/autosummary/class.rst
@@ -20,7 +20,7 @@
    .. rubric:: {{ _('Attributes') }}
 
    {% for item in attributes %}
-   .. autoattribute:: {{ item }}
+   .. autoproperty:: {{ item }}
    {%- endfor %}
    {% endif %}
    {% endblock %}
diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst
index 45d32808b..b5caaf479 100644
--- a/cuda_core/docs/source/api_private.rst
+++ b/cuda_core/docs/source/api_private.rst
@@ -17,6 +17,7 @@ CUDA runtime
    _stream.Stream
    _event.Event
    _device.DeviceProperties
+   _module.KernelAttributes
 
 
 CUDA compilation toolchain
@@ -26,4 +27,3 @@ CUDA compilation toolchain
    :toctree: generated/
 
    _module.Kernel
-   _module.ObjectCode
diff --git a/cuda_core/docs/source/conf.py b/cuda_core/docs/source/conf.py
index 9e0972e03..d3c89b6ee 100644
--- a/cuda_core/docs/source/conf.py
+++ b/cuda_core/docs/source/conf.py
@@ -95,6 +95,7 @@
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
+    "cuda.bindings": ("https://nvidia.github.io/cuda-python/cuda-bindings/latest", None),
 }
 
 napoleon_google_docstring = False
diff --git a/cuda_core/docs/source/release/0.2.0-notes.rst b/cuda_core/docs/source/release/0.2.0-notes.rst
index 81e4e1292..39cb586c1 100644
--- a/cuda_core/docs/source/release/0.2.0-notes.rst
+++ b/cuda_core/docs/source/release/0.2.0-notes.rst
@@ -9,20 +9,22 @@ Highlights
 ----------
 
 - Add :class:`~ProgramOptions` to facilitate the passing of runtime compile options to :obj:`~Program`.
-- Add :class:`~DeviceProperties` to provide pythonic access to device properties.
-- Add kernel attributes to :class:`~Kernel`
+- Add pythonic access to :class:`Device` and :class:`~_module.Kernel` attributes.
 
 Breaking Changes
 ----------------
 
-- Change ``__cuda_stream__`` from attribute to method
-- The :meth:`~Program.compile` method no longer accepts the `options` argument. Instead, you can optionally pass an instance of :class:`~ProgramOptions` to the constructor of :obj:`~Program`.
-- :meth:`~Device.properties` now provides an instance of :class:`~DeviceProperties` instead of a dictionary.
+- Change ``__cuda_stream__`` from attribute to method.
+- The :meth:`Program.compile` method no longer accepts the ``options`` argument. Instead, you can optionally pass an instance of :class:`ProgramOptions` to the constructor of :class:`Program`.
+- :meth:`Device.properties` now provides attribute getters instead of a dictionary interface.
+- The ``.handle`` attribute of various ``cuda.core`` objects now returns the underlying Python object instead of a (type-erased) Python integer.
 
 New features
 ------------
 
 - Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`.
+- A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`).
+- For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information.
 
 Limitations
 -----------
diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
index 8d7bc74a9..74820761c 100644
--- a/cuda_core/pyproject.toml
+++ b/cuda_core/pyproject.toml
@@ -59,6 +59,9 @@ issues = "https://github.com/NVIDIA/cuda-python/issues/"
 [tool.setuptools.packages.find]
 include = ["cuda.core*"]
 
+[tool.setuptools.package-data]
+"cuda.core.experimental.include" = ["*.h", "*.hpp", "*.cuh"]
+
 [tool.setuptools.dynamic]
 version = { attr = "cuda.core._version.__version__" }
 readme = { file = ["DESCRIPTION.rst"], content-type = "text/x-rst" }
diff --git a/cuda_core/tests/cython/build_tests.sh b/cuda_core/tests/cython/build_tests.sh
new file mode 100755
index 000000000..d85ccec77
--- /dev/null
+++ b/cuda_core/tests/cython/build_tests.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+SCRIPTPATH=$(dirname $(realpath "$0"))
+CPLUS_INCLUDE_PATH=$SCRIPTPATH/../../cuda/core/experimental/include:$CUDA_HOME/include:$CPLUS_INCLUDE_PATH cythonize -3 -i $(dirname "$0")/test_*.pyx
diff --git a/cuda_core/tests/cython/test_cython.py b/cuda_core/tests/cython/test_cython.py
new file mode 100644
index 000000000..dc887d656
--- /dev/null
+++ b/cuda_core/tests/cython/test_cython.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+import functools
+import importlib
+import sys
+
+
+def py_func(func):
+    """
+    Wraps func in a plain Python function.
+    """
+
+    @functools.wraps(func)
+    def wrapped(*args, **kwargs):
+        return func(*args, **kwargs)
+
+    return wrapped
+
+
+cython_test_modules = [
+    "test_get_cuda_native_handle",
+]
+
+
+for mod in cython_test_modules:
+    try:
+        # For each callable in `mod` with name `test_*`,
+        # wrap the callable in a plain Python function
+        # and set the result as an attribute of this module.
+        mod = importlib.import_module(mod)
+        for name in dir(mod):
+            item = getattr(mod, name)
+            if callable(item) and name.startswith("test_"):
+                item = py_func(item)
+                setattr(sys.modules[__name__], name, item)
+    except ImportError:
+        raise
diff --git a/cuda_core/tests/cython/test_get_cuda_native_handle.pyx b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx
new file mode 100644
index 000000000..efc5c9fef
--- /dev/null
+++ b/cuda_core/tests/cython/test_get_cuda_native_handle.pyx
@@ -0,0 +1,45 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+# distutils: language = c++
+# distutils: extra_compile_args = -std=c++17
+
+from libc.stdint cimport intptr_t
+
+from cuda.bindings.driver cimport (CUstream as pyCUstream,
+                                   CUevent as pyCUevent)
+from cuda.bindings.nvrtc cimport nvrtcProgram as pynvrtcProgram
+from cuda.bindings.cydriver cimport CUstream, CUevent
+from cuda.bindings.cynvrtc cimport nvrtcProgram
+
+from cuda.core.experimental import Device, Program
+
+
+cdef extern from "utility.hpp":
+    void* get_cuda_native_handle[T](T)
+
+
+def test_get_cuda_native_handle():
+    dev = Device(0)
+    dev.set_current()
+    
+    s = dev.create_stream()
+    cdef pyCUstream s_py = s.handle
+    cdef CUstream s_c = <CUstream>get_cuda_native_handle(s_py)
+    assert <intptr_t>(s_c) == <intptr_t>(int(s_py))
+
+    e = s.record()
+    cdef pyCUevent e_py = e.handle
+    cdef CUevent e_c = <CUevent>get_cuda_native_handle(e_py)
+    assert <intptr_t>(e_c) == <intptr_t>(int(e_py))
+
+    prog = Program("extern \"C\" __global__ void dummy() {}", "c++")
+    assert prog.backend == "NVRTC"
+    cdef pynvrtcProgram prog_py = prog.handle
+    cdef nvrtcProgram prog_c = <nvrtcProgram>get_cuda_native_handle(prog_py)
+    assert <intptr_t>(prog_c) == <intptr_t>(int(prog_py))
+
+    prog.close()
+    e.close()
+    s.close()
diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py
index 9a9432cb8..f085e3c61 100644
--- a/cuda_core/tests/example_tests/test_basic_examples.py
+++ b/cuda_core/tests/example_tests/test_basic_examples.py
@@ -13,6 +13,8 @@
 
 import pytest
 
+from cuda.core.experimental import Device
+
 from .utils import run_example
 
 samples_path = os.path.join(os.path.dirname(__file__), "..", "..", "examples")
@@ -23,3 +25,5 @@
 class TestExamples:
     def test_example(self, example, deinit_cuda):
         run_example(samples_path, example)
+        if Device().device_id != 0:
+            Device(0).set_current()
diff --git a/cuda_core/tests/pytest.ini b/cuda_core/tests/pytest.ini
new file mode 100644
index 000000000..b703c9832
--- /dev/null
+++ b/cuda_core/tests/pytest.ini
@@ -0,0 +1,6 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+#
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
+
+[pytest]
+norecursedirs = cython
diff --git a/cuda_core/tests/test_linker.py b/cuda_core/tests/test_linker.py
index 732f70c32..59e8ca573 100644
--- a/cuda_core/tests/test_linker.py
+++ b/cuda_core/tests/test_linker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
@@ -17,8 +17,8 @@
 device_function_b = "__device__ int B() { return 0; }"
 device_function_c = "__device__ int C(int a, int b) { return a + b; }"
 
-culink_backend = _linker._decide_nvjitlink_or_driver()
-if not culink_backend:
+is_culink_backend = _linker._decide_nvjitlink_or_driver()
+if not is_culink_backend:
     from cuda.bindings import nvjitlink
 
 
@@ -54,7 +54,7 @@ def compile_ltoir_functions(init_cuda):
     LinkerOptions(arch=ARCH, debug=True),
     LinkerOptions(arch=ARCH, lineinfo=True),
 ]
-if not culink_backend:
+if not is_culink_backend:
     options += [
         LinkerOptions(arch=ARCH, time=True),
         LinkerOptions(arch=ARCH, optimize_unused_variables=True),
@@ -85,16 +85,17 @@ def test_linker_init(compile_ptx_functions, options):
     linker = Linker(*compile_ptx_functions, options=options)
     object_code = linker.link("cubin")
     assert isinstance(object_code, ObjectCode)
+    assert linker.backend == ("driver" if is_culink_backend else "nvJitLink")
 
 
 def test_linker_init_invalid_arch(compile_ptx_functions):
-    err = AttributeError if culink_backend else nvjitlink.nvJitLinkError
+    err = AttributeError if is_culink_backend else nvjitlink.nvJitLinkError
     with pytest.raises(err):
         options = LinkerOptions(arch="99", ptx=True)
         Linker(*compile_ptx_functions, options=options)
 
 
-@pytest.mark.skipif(culink_backend, reason="culink does not support ptx option")
+@pytest.mark.skipif(is_culink_backend, reason="culink does not support ptx option")
 def test_linker_link_ptx_nvjitlink(compile_ltoir_functions):
     options = LinkerOptions(arch=ARCH, link_time_optimization=True, ptx=True)
     linker = Linker(*compile_ltoir_functions, options=options)
@@ -102,7 +103,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions):
     assert isinstance(linked_code, ObjectCode)
 
 
-@pytest.mark.skipif(not culink_backend, reason="nvjitlink requires lto for ptx linking")
+@pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking")
 def test_linker_link_ptx_culink(compile_ptx_functions):
     options = LinkerOptions(arch=ARCH)
     linker = Linker(*compile_ptx_functions, options=options)
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 355a0c49a..8528c4d53 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -96,6 +96,7 @@ def test_object_code_load_cubin(get_saxpy_kernel):
     sym_map = mod._sym_map
     assert isinstance(cubin, bytes)
     mod = ObjectCode.from_cubin(cubin, symbol_mapping=sym_map)
+    assert mod.code == cubin
     mod.get_kernel("saxpy<double>")  # force loading
 
 
@@ -107,4 +108,5 @@ def test_object_code_load_cubin_from_file(get_saxpy_kernel, tmp_path):
     cubin_file = tmp_path / "test.cubin"
     cubin_file.write_bytes(cubin)
     mod = ObjectCode.from_cubin(str(cubin_file), symbol_mapping=sym_map)
+    assert mod.code == str(cubin_file)
     mod.get_kernel("saxpy<double>")  # force loading
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 8d2ecd1ab..05132a201 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -1,10 +1,6 @@
-# Copyright 2024 NVIDIA Corporation.  All rights reserved.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
+# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import warnings
 
@@ -14,6 +10,8 @@
 from cuda.core.experimental._module import Kernel, ObjectCode
 from cuda.core.experimental._program import Program, ProgramOptions
 
+is_culink_backend = _linker._decide_nvjitlink_or_driver()
+
 
 @pytest.fixture(scope="module")
 def ptx_code_object():
@@ -50,7 +48,7 @@ def ptx_code_object():
 def test_cpp_program_with_various_options(init_cuda, options):
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++", options)
-    assert program.backend == "nvrtc"
+    assert program.backend == "NVRTC"
     program.compile("ptx")
     program.close()
     assert program.handle is None
@@ -65,8 +63,7 @@ def test_cpp_program_with_various_options(init_cuda, options):
     ProgramOptions(prec_sqrt=True),
     ProgramOptions(fma=True),
 ]
-if not _linker._decide_nvjitlink_or_driver():
-    print("Using nvjitlink as the backend because decide() returned false")
+if not is_culink_backend:
     options += [
         ProgramOptions(time=True),
         ProgramOptions(split_compile=True),
@@ -76,7 +73,7 @@ def test_cpp_program_with_various_options(init_cuda, options):
 @pytest.mark.parametrize("options", options)
 def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options):
     program = Program(ptx_code_object._module.decode(), "ptx", options=options)
-    assert program.backend == "linker"
+    assert program.backend == ("driver" if is_culink_backend else "nvJitLink")
     program.compile("cubin")
     program.close()
     assert program.handle is None
@@ -85,7 +82,7 @@ def test_ptx_program_with_various_options(init_cuda, ptx_code_object, options):
 def test_program_init_valid_code_type():
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++")
-    assert program.backend == "nvrtc"
+    assert program.backend == "NVRTC"
     assert program.handle is not None
 
 
@@ -125,14 +122,14 @@ def test_program_compile_valid_target_type(init_cuda):
 def test_program_compile_invalid_target_type():
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++")
-    with pytest.raises(NotImplementedError):
+    with pytest.raises(ValueError):
         program.compile("invalid_target")
 
 
 def test_program_backend_property():
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++")
-    assert program.backend == "nvrtc"
+    assert program.backend == "NVRTC"
 
 
 def test_program_handle_property():
diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py
index 9c661192f..e8b59b34f 100644
--- a/cuda_core/tests/test_stream.py
+++ b/cuda_core/tests/test_stream.py
@@ -11,6 +11,7 @@
 from cuda.core.experimental import Device, Stream, StreamOptions
 from cuda.core.experimental._event import Event
 from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM, default_stream
+from cuda.core.experimental._utils import driver
 
 
 def test_stream_init():
@@ -26,7 +27,7 @@ def test_stream_init_with_options(init_cuda):
 
 def test_stream_handle(init_cuda):
     stream = Device().create_stream(options=StreamOptions())
-    assert isinstance(stream.handle, int)
+    assert isinstance(stream.handle, driver.CUstream)
 
 
 def test_stream_is_nonblocking(init_cuda):
@@ -90,7 +91,8 @@ def test_stream_from_foreign_stream(init_cuda):
     device = Device()
     other_stream = device.create_stream(options=StreamOptions())
     stream = device.create_stream(obj=other_stream)
-    assert other_stream.handle == stream.handle
+    # convert to int to work around NVIDIA/cuda-python#465
+    assert int(other_stream.handle) == int(stream.handle)
     device = stream.device
     assert isinstance(device, Device)
     context = stream.context