Skip to content

Commit eb82b80

Browse files
committed
Merge branch 'main' into reduce_cudart
2 parents d279e50 + 414b124 commit eb82b80

File tree

22 files changed

+93
-57
lines changed

22 files changed

+93
-57
lines changed

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@ CUDA Python is the home for accessing NVIDIA’s CUDA platform from Python. It c
44

55
* [cuda.core](https://nvidia.github.io/cuda-python/cuda-core/latest): Pythonic access to CUDA Runtime and other core functionalities
66
* [cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest): Low-level Python bindings to CUDA C APIs
7-
* [cuda.cooperative](https://nvidia.github.io/cccl/cuda_cooperative/): A Python package providing CCCL's reusable block-wide and warp-wide *device* primitives for use within Numba CUDA kernels
8-
* [cuda.parallel](https://nvidia.github.io/cccl/cuda_parallel/): A Python package for easy access to CCCL's highly efficient and customizable parallel algorithms, like `sort`, `scan`, `reduce`, `transform`, etc, that are callable on the *host*
7+
* [cuda.cccl.cooperative](https://nvidia.github.io/cccl/cuda_cooperative/): A Python module providing CCCL's reusable block-wide and warp-wide *device* primitives for use within Numba CUDA kernels
8+
* [cuda.cccl.parallel](https://nvidia.github.io/cccl/cuda_parallel/): A Python module for easy access to CCCL's highly efficient and customizable parallel algorithms, like `sort`, `scan`, `reduce`, `transform`, etc, that are callable on the *host*
99
* [numba.cuda](https://nvidia.github.io/numba-cuda/): Numba's target for CUDA GPU programming by directly compiling a restricted subset of Python code into CUDA kernels and device functions following the CUDA execution model.
10-
11-
For access to NVIDIA CPU & GPU Math Libraries, please refer to [nvmath-python](https://docs.nvidia.com/cuda/nvmath-python/latest).
10+
* [nvmath-python](https://docs.nvidia.com/cuda/nvmath-python/latest): Pythonic access to NVIDIA CPU & GPU Math Libraries, with both [*host*](https://docs.nvidia.com/cuda/nvmath-python/latest/overview.html#host-apis) and [*device* (nvmath.device)](https://docs.nvidia.com/cuda/nvmath-python/latest/overview.html#device-apis) APIs. It also provides low-level Python bindings to host C APIs ([nvmath.bindings](https://docs.nvidia.com/cuda/nvmath-python/latest/bindings/index.html)).
1211

1312
CUDA Python is currently undergoing an overhaul to improve existing and bring up new components. All of the previously available functionalities from the `cuda-python` package will continue to be available, please refer to the [cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest) documentation for installation guide and further detail.
1413

ci/tools/setup-sanitizer

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@ set -euo pipefail
1212
if [[ "${SETUP_SANITIZER}" == 1 ]]; then
1313
COMPUTE_SANITIZER="${CUDA_HOME}/bin/compute-sanitizer"
1414
COMPUTE_SANITIZER_VERSION=$(${COMPUTE_SANITIZER} --version | grep -Eo "[0-9]{4}\.[0-9]\.[0-9]" | sed -e 's/\.//g')
15-
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1"
15+
SANITIZER_CMD="${COMPUTE_SANITIZER} --target-processes=all --launch-timeout=0 --tool=memcheck --error-exitcode=1 --report-api-errors=no"
1616
if [[ "$COMPUTE_SANITIZER_VERSION" -ge 202111 ]]; then
1717
SANITIZER_CMD="${SANITIZER_CMD} --padding=32"
1818
fi
19-
echo "CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER=1" >> $GITHUB_ENV
2019
else
2120
SANITIZER_CMD=""
2221
fi

cuda_bindings/cuda/bindings/_internal/nvvm.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ from ..cynvvm cimport *
1111
# Wrapper functions
1212
###############################################################################
1313

14+
cdef const char* _nvvmGetErrorString(nvvmResult result) except?NULL nogil
1415
cdef nvvmResult _nvvmVersion(int* major, int* minor) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil
1516
cdef nvvmResult _nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil
1617
cdef nvvmResult _nvvmCreateProgram(nvvmProgram* prog) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil

cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ cdef extern from "<dlfcn.h>" nogil:
3636
cdef bint __py_nvvm_init = False
3737
cdef void* __cuDriverGetVersion = NULL
3838

39+
cdef void* __nvvmGetErrorString = NULL
3940
cdef void* __nvvmVersion = NULL
4041
cdef void* __nvvmIRVersion = NULL
4142
cdef void* __nvvmCreateProgram = NULL
@@ -82,6 +83,13 @@ cdef int _check_or_init_nvvm() except -1 nogil:
8283
handle = NULL
8384

8485
# Load function
86+
global __nvvmGetErrorString
87+
__nvvmGetErrorString = dlsym(RTLD_DEFAULT, 'nvvmGetErrorString')
88+
if __nvvmGetErrorString == NULL:
89+
if handle == NULL:
90+
handle = load_library(driver_ver)
91+
__nvvmGetErrorString = dlsym(handle, 'nvvmGetErrorString')
92+
8593
global __nvvmVersion
8694
__nvvmVersion = dlsym(RTLD_DEFAULT, 'nvvmVersion')
8795
if __nvvmVersion == NULL:
@@ -181,6 +189,9 @@ cpdef dict _inspect_function_pointers():
181189
_check_or_init_nvvm()
182190
cdef dict data = {}
183191

192+
global __nvvmGetErrorString
193+
data["__nvvmGetErrorString"] = <intptr_t>__nvvmGetErrorString
194+
184195
global __nvvmVersion
185196
data["__nvvmVersion"] = <intptr_t>__nvvmVersion
186197

@@ -232,6 +243,16 @@ cpdef _inspect_function_pointer(str name):
232243
# Wrapper functions
233244
###############################################################################
234245

246+
cdef const char* _nvvmGetErrorString(nvvmResult result) except?NULL nogil:
247+
global __nvvmGetErrorString
248+
_check_or_init_nvvm()
249+
if __nvvmGetErrorString == NULL:
250+
with gil:
251+
raise FunctionNotFoundError("function nvvmGetErrorString is not found")
252+
return (<const char* (*)(nvvmResult) noexcept nogil>__nvvmGetErrorString)(
253+
result)
254+
255+
235256
cdef nvvmResult _nvvmVersion(int* major, int* minor) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil:
236257
global __nvvmVersion
237258
_check_or_init_nvvm()

cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100
2323
cdef bint __py_nvvm_init = False
2424
cdef void* __cuDriverGetVersion = NULL
2525

26+
cdef void* __nvvmGetErrorString = NULL
2627
cdef void* __nvvmVersion = NULL
2728
cdef void* __nvvmIRVersion = NULL
2829
cdef void* __nvvmCreateProgram = NULL
@@ -62,6 +63,12 @@ cdef int _check_or_init_nvvm() except -1 nogil:
6263
handle = path_finder._load_nvidia_dynamic_library("nvvm").handle
6364

6465
# Load function
66+
global __nvvmGetErrorString
67+
try:
68+
__nvvmGetErrorString = <void*><intptr_t>win32api.GetProcAddress(handle, 'nvvmGetErrorString')
69+
except:
70+
pass
71+
6572
global __nvvmVersion
6673
try:
6774
__nvvmVersion = <void*><intptr_t>win32api.GetProcAddress(handle, 'nvvmVersion')
@@ -149,6 +156,9 @@ cpdef dict _inspect_function_pointers():
149156
_check_or_init_nvvm()
150157
cdef dict data = {}
151158

159+
global __nvvmGetErrorString
160+
data["__nvvmGetErrorString"] = <intptr_t>__nvvmGetErrorString
161+
152162
global __nvvmVersion
153163
data["__nvvmVersion"] = <intptr_t>__nvvmVersion
154164

@@ -200,6 +210,16 @@ cpdef _inspect_function_pointer(str name):
200210
# Wrapper functions
201211
###############################################################################
202212

213+
cdef const char* _nvvmGetErrorString(nvvmResult result) except?NULL nogil:
214+
global __nvvmGetErrorString
215+
_check_or_init_nvvm()
216+
if __nvvmGetErrorString == NULL:
217+
with gil:
218+
raise FunctionNotFoundError("function nvvmGetErrorString is not found")
219+
return (<const char* (*)(nvvmResult) noexcept nogil>__nvvmGetErrorString)(
220+
result)
221+
222+
203223
cdef nvvmResult _nvvmVersion(int* major, int* minor) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil:
204224
global __nvvmVersion
205225
_check_or_init_nvvm()

cuda_bindings/cuda/bindings/cynvvm.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ ctypedef void* nvvmProgram 'nvvmProgram'
3333
# Functions
3434
###############################################################################
3535

36+
cdef const char* nvvmGetErrorString(nvvmResult result) except?NULL nogil
3637
cdef nvvmResult nvvmVersion(int* major, int* minor) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil
3738
cdef nvvmResult nvvmIRVersion(int* majorIR, int* minorIR, int* majorDbg, int* minorDbg) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil
3839
cdef nvvmResult nvvmCreateProgram(nvvmProgram* prog) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil

cuda_bindings/cuda/bindings/cynvvm.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ from ._internal cimport nvvm as _nvvm
1111
# Wrapper functions
1212
###############################################################################
1313

14+
cdef const char* nvvmGetErrorString(nvvmResult result) except?NULL nogil:
15+
return _nvvm._nvvmGetErrorString(result)
16+
17+
1418
cdef nvvmResult nvvmVersion(int* major, int* minor) except?_NVVMRESULT_INTERNAL_LOADING_ERROR nogil:
1519
return _nvvm._nvvmVersion(major, minor)
1620

cuda_bindings/cuda/bindings/nvvm.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ ctypedef nvvmResult _Result
2727
# Functions
2828
###############################################################################
2929

30+
cpdef str get_error_string(int result)
3031
cpdef tuple version()
3132
cpdef tuple ir_version()
3233
cpdef intptr_t create_program() except? 0

cuda_bindings/cuda/bindings/nvvm.pyx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,19 @@ cpdef destroy_program(intptr_t prog):
7373
check_status(status)
7474

7575

76+
cpdef str get_error_string(int result):
77+
"""Get the message string for the given ``nvvmResult`` code.
78+
79+
Args:
80+
result (Result): NVVM API result code.
81+
82+
.. seealso:: `nvvmGetErrorString`
83+
"""
84+
cdef bytes _output_
85+
_output_ = nvvmGetErrorString(<_Result>result)
86+
return _output_.decode()
87+
88+
7689
cpdef tuple version():
7790
"""Get the NVVM version.
7891

cuda_bindings/docs/source/environment_variables.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,3 @@
1111
## Runtime Environment Variables
1212

1313
- `CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM` : When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See [Stream Synchronization Behavior](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) for an explanation of the legacy and per-thread default streams.
14-
15-
16-
## Test-Time Environment Variables
17-
18-
- `CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER` : When set to 1, tests are skipped that would cause [compute-sanitizer](https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html) to raise an error.

cuda_bindings/tests/conftest.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
# Copyright 2025 NVIDIA Corporation. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

4-
import os
54

65
import pytest
76

8-
skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
9-
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
10-
reason="The compute-sanitizer is running, and this test causes an API error.",
11-
)
12-
137

148
def pytest_configure(config):
159
config.custom_info = []

cuda_bindings/tests/test_cuda.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
import numpy as np
99
import pytest
10-
from conftest import skipif_testing_with_compute_sanitizer
1110

1211
import cuda.cuda as cuda
1312
import cuda.cudart as cudart
@@ -80,7 +79,6 @@ def test_cuda_memcpy():
8079
assert err == cuda.CUresult.CUDA_SUCCESS
8180

8281

83-
@skipif_testing_with_compute_sanitizer
8482
def test_cuda_array():
8583
(err,) = cuda.cuInit(0)
8684
assert err == cuda.CUresult.CUDA_SUCCESS
@@ -234,7 +232,6 @@ def test_cuda_uuid_list_access():
234232
assert err == cuda.CUresult.CUDA_SUCCESS
235233

236234

237-
@skipif_testing_with_compute_sanitizer
238235
def test_cuda_cuModuleLoadDataEx():
239236
(err,) = cuda.cuInit(0)
240237
assert err == cuda.CUresult.CUDA_SUCCESS
@@ -622,7 +619,6 @@ def test_cuda_coredump_attr():
622619
assert err == cuda.CUresult.CUDA_SUCCESS
623620

624621

625-
@skipif_testing_with_compute_sanitizer
626622
def test_get_error_name_and_string():
627623
(err,) = cuda.cuInit(0)
628624
assert err == cuda.CUresult.CUDA_SUCCESS
@@ -952,7 +948,6 @@ def test_CUmemDecompressParams_st():
952948
assert int(desc.dstActBytes) == 0
953949

954950

955-
@skipif_testing_with_compute_sanitizer
956951
def test_all_CUresult_codes():
957952
max_code = int(max(cuda.CUresult))
958953
# Smoke test. CUDA_ERROR_UNKNOWN = 999, but intentionally using literal value.
@@ -985,21 +980,18 @@ def test_all_CUresult_codes():
985980
assert num_good >= 76 # CTK 11.0.3_450.51.06
986981

987982

988-
@skipif_testing_with_compute_sanitizer
989983
def test_cuKernelGetName_failure():
990984
err, name = cuda.cuKernelGetName(0)
991985
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
992986
assert name is None
993987

994988

995-
@skipif_testing_with_compute_sanitizer
996989
def test_cuFuncGetName_failure():
997990
err, name = cuda.cuFuncGetName(0)
998991
assert err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE
999992
assert name is None
1000993

1001994

1002-
@skipif_testing_with_compute_sanitizer
1003995
@pytest.mark.skipif(
1004996
driverVersionLessThan(12080) or not supportsCudaAPI("cuCheckpointProcessGetState"),
1005997
reason="When API was introduced",

cuda_bindings/tests/test_cudart.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
import numpy as np
88
import pytest
9-
from conftest import skipif_testing_with_compute_sanitizer
109

1110
import cuda.cuda as cuda
1211
import cuda.cudart as cudart
@@ -67,7 +66,6 @@ def test_cudart_memcpy():
6766
assertSuccess(err)
6867

6968

70-
@skipif_testing_with_compute_sanitizer
7169
def test_cudart_hostRegister():
7270
# Use hostRegister API to check for correct enum return values
7371
page_size = 80

cuda_bindings/tests/test_nvjitlink.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,13 @@ def check_nvjitlink_usable():
6666
def get_dummy_ltoir():
6767
def CHECK_NVRTC(err):
6868
if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
69-
raise RuntimeError(f"Nvrtc Error: {err}")
69+
raise RuntimeError(repr(err))
7070

7171
empty_cplusplus_kernel = "__global__ void A() {}"
7272
err, program_handle = nvrtc.nvrtcCreateProgram(empty_cplusplus_kernel.encode(), b"", 0, [], [])
7373
CHECK_NVRTC(err)
74-
nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto"])
74+
err = nvrtc.nvrtcCompileProgram(program_handle, 1, [b"-dlto"])[0]
75+
CHECK_NVRTC(err)
7576
err, size = nvrtc.nvrtcGetLTOIRSize(program_handle)
7677
CHECK_NVRTC(err)
7778
empty_kernel_ltoir = b" " * size

cuda_bindings/tests/test_nvvm.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,20 @@ def get_program_log(prog):
194194
return buffer.decode(errors="backslashreplace")
195195

196196

197+
def test_get_error_string():
198+
num_success = 0
199+
num_errors = 0
200+
for enum_obj in nvvm.Result:
201+
es = nvvm.get_error_string(enum_obj)
202+
if enum_obj is nvvm.Result.SUCCESS:
203+
num_success += 1
204+
else:
205+
assert es.startswith("NVVM_ERROR")
206+
num_errors += 1
207+
assert num_success == 1
208+
assert num_errors > 1 # smoke check is sufficient
209+
210+
197211
def test_nvvm_version():
198212
ver = nvvm.version()
199213
assert len(ver) == 2

cuda_core/tests/conftest.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,5 @@ def pop_all_contexts():
6565
return pop_all_contexts
6666

6767

68-
skipif_testing_with_compute_sanitizer = pytest.mark.skipif(
69-
os.environ.get("CUDA_PYTHON_TESTING_WITH_COMPUTE_SANITIZER", "0") == "1",
70-
reason="The compute-sanitizer is running, and this test causes an API error.",
71-
)
72-
73-
7468
# TODO: make the fixture more sophisticated using path finder
7569
skipif_need_cuda_headers = pytest.mark.skipif(os.environ.get("CUDA_PATH") is None, reason="need CUDA header")

cuda_core/tests/test_cuda_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

55
import pytest
6-
from conftest import skipif_testing_with_compute_sanitizer
76

87
from cuda.bindings import driver, runtime
98
from cuda.core.experimental._utils import cuda_utils
@@ -41,8 +40,6 @@ def test_runtime_cuda_error_explanations_health():
4140
assert not extra_expl
4241

4342

44-
# this test causes an API error when the driver is too old to know about all of the error codes
45-
@skipif_testing_with_compute_sanitizer
4643
def test_check_driver_error():
4744
num_unexpected = 0
4845
for error in driver.CUresult:

cuda_core/tests/test_event.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99
import pytest
10-
from conftest import skipif_need_cuda_headers, skipif_testing_with_compute_sanitizer
10+
from conftest import skipif_need_cuda_headers
1111

1212
import cuda.core.experimental
1313
from cuda.core.experimental import Device, EventOptions, LaunchConfig, Program, ProgramOptions, launch
@@ -71,7 +71,6 @@ def test_is_done(init_cuda):
7171
assert event.is_done in (True, False)
7272

7373

74-
@skipif_testing_with_compute_sanitizer
7574
def test_error_timing_disabled():
7675
device = Device()
7776
device.set_current()
@@ -94,7 +93,6 @@ def test_error_timing_disabled():
9493
event2 - event1
9594

9695

97-
@skipif_testing_with_compute_sanitizer
9896
def test_error_timing_recorded():
9997
device = Device()
10098
device.set_current()
@@ -114,7 +112,6 @@ def test_error_timing_recorded():
114112
event3 - event2
115113

116114

117-
@skipif_testing_with_compute_sanitizer
118115
@skipif_need_cuda_headers # libcu++
119116
@pytest.mark.skipif(tuple(int(i) for i in np.__version__.split(".")[:2]) < (2, 1), reason="need numpy 2.1.0+")
120117
def test_error_timing_incomplete():

cuda_core/tests/test_linker.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44

55
import pytest
6-
from conftest import skipif_testing_with_compute_sanitizer
76

87
from cuda.core.experimental import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker
98
from cuda.core.experimental._module import ObjectCode
@@ -145,8 +144,6 @@ def test_linker_link_invalid_target_type(compile_ptx_functions):
145144
linker.link("invalid_target")
146145

147146

148-
# this test causes an API error when using the culink API
149-
@skipif_testing_with_compute_sanitizer
150147
def test_linker_get_error_log(compile_ptx_functions):
151148
options = LinkerOptions(name="ABC", arch=ARCH)
152149

0 commit comments

Comments
 (0)