Skip to content

Commit d8e0f08

Browse files
committed
enchanced typing in buffer, MR, Stream, ...
1 parent a6387a0 commit d8e0f08

File tree

11 files changed

+213
-103
lines changed

11 files changed

+213
-103
lines changed

cuda_core/cuda/core/experimental/_device.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from cuda.core.experimental._context import Context, ContextOptions
99
from cuda.core.experimental._event import Event, EventOptions
1010
from cuda.core.experimental._memory import Buffer, MemoryResource, _DefaultAsyncMempool, _SynchronousMemoryResource
11-
from cuda.core.experimental._stream import Stream, StreamOptions, default_stream
11+
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions, default_stream
1212
from cuda.core.experimental._utils.clear_error_support import assert_type
1313
from cuda.core.experimental._utils.cuda_utils import (
1414
ComputeCapability,
@@ -1206,7 +1206,7 @@ def create_context(self, options: ContextOptions = None) -> Context:
12061206
raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
12071207

12081208
@precondition(_check_context_initialized)
1209-
def create_stream(self, obj=None, options: StreamOptions = None) -> Stream:
1209+
def create_stream(self, obj: Optional[IsStreamT] = None, options: StreamOptions = None) -> Stream:
12101210
"""Create a Stream object.
12111211
12121212
New stream objects can be created in two different ways:
@@ -1258,7 +1258,7 @@ def create_event(self, options: Optional[EventOptions] = None) -> Event:
12581258
return Event._init(self._id, self.context._handle, options)
12591259

12601260
@precondition(_check_context_initialized)
1261-
def allocate(self, size, stream=None) -> Buffer:
1261+
def allocate(self, size, stream: Optional[Stream] = None) -> Buffer:
12621262
"""Allocate device memory from a specified stream.
12631263
12641264
Allocates device memory of `size` bytes on the specified `stream`

cuda_core/cuda/core/experimental/_launch_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class LaunchConfig:
5050
shmem_size : int, optional
5151
Dynamic shared-memory size per thread block in bytes.
5252
(Default to size 0)
53-
53+
cooperative_launch : bool, optional
54+
Whether this config can be used to launch a cooperative kernel.
5455
"""
5556

5657
# TODO: expand LaunchConfig to include other attributes

cuda_core/cuda/core/experimental/_launcher.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
from typing import Union
56

67
from cuda.core.experimental._kernel_arg_handler import ParamHolder
78
from cuda.core.experimental._launch_config import LaunchConfig, _to_native_launch_config
89
from cuda.core.experimental._module import Kernel
9-
from cuda.core.experimental._stream import Stream
10+
from cuda.core.experimental._stream import IsStreamT, Stream, _try_to_get_stream_ptr
1011
from cuda.core.experimental._utils.clear_error_support import assert_type
1112
from cuda.core.experimental._utils.cuda_utils import (
1213
_reduce_3_tuple,
@@ -34,7 +35,7 @@ def _lazy_init():
3435
_inited = True
3536

3637

37-
def launch(stream, config, kernel, *kernel_args):
38+
def launch(stream: Union[Stream, IsStreamT], config: LaunchConfig, kernel: Kernel, *kernel_args):
3839
"""Launches a :obj:`~_module.Kernel`
3940
object with launch-time configuration.
4041
@@ -43,7 +44,7 @@ def launch(stream, config, kernel, *kernel_args):
4344
stream : :obj:`~_stream.Stream`
4445
The stream establishing the stream ordering semantic of a
4546
launch.
46-
config : :obj:`~_launcher.LaunchConfig`
47+
config : :obj:`LaunchConfig`
4748
Launch configurations inline with options provided by
4849
:obj:`~_launcher.LaunchConfig` dataclass.
4950
kernel : :obj:`~_module.Kernel`
@@ -55,13 +56,15 @@ def launch(stream, config, kernel, *kernel_args):
5556
"""
5657
if stream is None:
5758
raise ValueError("stream cannot be None, stream must either be a Stream object or support __cuda_stream__")
58-
if not isinstance(stream, Stream):
59+
try:
60+
stream_handle = stream.handle
61+
except AttributeError:
5962
try:
60-
stream = Stream._init(stream)
61-
except Exception as e:
63+
stream_handle = _try_to_get_stream_ptr(stream)
64+
except Exception:
6265
raise ValueError(
6366
f"stream must either be a Stream object or support __cuda_stream__ (got {type(stream)})"
64-
) from e
67+
) from None
6568
assert_type(kernel, Kernel)
6669
_lazy_init()
6770
config = check_or_create_options(LaunchConfig, config, "launch config")
@@ -78,15 +81,15 @@ def launch(stream, config, kernel, *kernel_args):
7881
# rich.
7982
if _use_ex:
8083
drv_cfg = _to_native_launch_config(config)
81-
drv_cfg.hStream = stream.handle
84+
drv_cfg.hStream = stream_handle
8285
if config.cooperative_launch:
8386
_check_cooperative_launch(kernel, config, stream)
8487
handle_return(driver.cuLaunchKernelEx(drv_cfg, int(kernel._handle), args_ptr, 0))
8588
else:
8689
# TODO: check if config has any unsupported attrs
8790
handle_return(
8891
driver.cuLaunchKernel(
89-
int(kernel._handle), *config.grid, *config.block, config.shmem_size, stream.handle, args_ptr, 0
92+
int(kernel._handle), *config.grid, *config.block, config.shmem_size, stream_handle, args_ptr, 0
9093
)
9194
)
9295

cuda_core/cuda/core/experimental/_memory.py

Lines changed: 102 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,18 @@
99
from typing import Optional, Tuple, TypeVar, Union
1010

1111
from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
12-
from cuda.core.experimental._stream import default_stream
12+
from cuda.core.experimental._stream import Stream, default_stream
1313
from cuda.core.experimental._utils.cuda_utils import driver, handle_return
1414

15-
PyCapsule = TypeVar("PyCapsule")
16-
17-
1815
# TODO: define a memory property mixin class and make Buffer and
1916
# MemoryResource both inherit from it
2017

18+
19+
PyCapsule = TypeVar("PyCapsule")
20+
"""Represent the capsule type."""
21+
2122
DevicePointerT = Union[driver.CUdeviceptr, int, None]
22-
"""A type union of `Cudeviceptr`, `int` and `None` for hinting Buffer.handle."""
23+
"""A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting :attr:`Buffer.handle`."""
2324

2425

2526
class Buffer:
@@ -29,19 +30,7 @@ class Buffer:
2930
different memory resources are to give access to their memory
3031
allocations.
3132
32-
Support for data interchange mechanisms are provided by
33-
establishing both the DLPack and the Python-level buffer
34-
protocols.
35-
36-
Parameters
37-
----------
38-
ptr : Any
39-
Allocated buffer handle object
40-
size : Any
41-
Memory size of the buffer
42-
mr : :obj:`~_memory.MemoryResource`, optional
43-
Memory resource associated with the buffer
44-
33+
Support for data interchange mechanisms are provided by DLPack.
4534
"""
4635

4736
class _MembersNeededForFinalize:
@@ -64,22 +53,26 @@ def close(self, stream=None):
6453
# TODO: handle ownership? (_mr could be None)
6554
__slots__ = ("__weakref__", "_mnff")
6655

67-
def __init__(self, ptr, size, mr: MemoryResource = None):
56+
def __new__(self, *args, **kwargs):
57+
raise RuntimeError("Buffer objects cannot be instantiated directly. Please use MemoryResource APIs.")
58+
59+
@classmethod
60+
def _init(cls, ptr: DevicePointerT, size: int, mr: Optional[MemoryResource] = None):
61+
self = super().__new__(cls)
6862
self._mnff = Buffer._MembersNeededForFinalize(self, ptr, size, mr)
63+
return self
6964

70-
def close(self, stream=None):
65+
def close(self, stream: Stream = None):
7166
"""Deallocate this buffer asynchronously on the given stream.
7267
7368
This buffer is released back to their memory resource
7469
asynchronously on the given stream.
7570
7671
Parameters
7772
----------
78-
stream : Any, optional
79-
The stream object with a __cuda_stream__ protocol to
80-
use for asynchronous deallocation. Defaults to using
81-
the default stream.
82-
73+
stream : Stream, optional
74+
The stream object to use for asynchronous deallocation. If not set,
75+
the current default is to the default stream.
8376
"""
8477
self._mnff.close(stream)
8578

@@ -95,7 +88,7 @@ def handle(self) -> DevicePointerT:
9588
return self._mnff.ptr
9689

9790
@property
98-
def size(self):
91+
def size(self) -> int:
9992
"""Return the memory size of this buffer."""
10093
return self._mnff.size
10194

@@ -125,7 +118,7 @@ def device_id(self) -> int:
125118
return self._mnff.mr.device_id
126119
raise NotImplementedError("WIP: Currently this property only supports buffers with associated MemoryResource")
127120

128-
def copy_to(self, dst: Buffer = None, *, stream) -> Buffer:
121+
def copy_to(self, dst: Buffer = None, *, stream: Stream) -> Buffer:
129122
"""Copy from this buffer to the dst buffer asynchronously on the given stream.
130123
131124
Copies the data from this buffer to the provided dst buffer.
@@ -136,7 +129,7 @@ def copy_to(self, dst: Buffer = None, *, stream) -> Buffer:
136129
----------
137130
dst : :obj:`~_memory.Buffer`
138131
Source buffer to copy data from
139-
stream : Any
132+
stream : Stream
140133
Keyword argument specifying the stream for the
141134
asynchronous copy
142135
@@ -154,14 +147,14 @@ def copy_to(self, dst: Buffer = None, *, stream) -> Buffer:
154147
handle_return(driver.cuMemcpyAsync(dst._mnff.ptr, self._mnff.ptr, self._mnff.size, stream.handle))
155148
return dst
156149

157-
def copy_from(self, src: Buffer, *, stream):
150+
def copy_from(self, src: Buffer, *, stream: Stream):
158151
"""Copy from the src buffer to this buffer asynchronously on the given stream.
159152
160153
Parameters
161154
----------
162155
src : :obj:`~_memory.Buffer`
163156
Source buffer to copy data from
164-
stream : Any
157+
stream : Stream
165158
Keyword argument specifying the stream for the
166159
asynchronous copy
167160
@@ -219,55 +212,117 @@ def __release_buffer__(self, buffer: memoryview, /):
219212
# Supporting method paired with __buffer__.
220213
raise NotImplementedError("WIP: Buffer.__release_buffer__ hasn't been implemented yet.")
221214

215+
@staticmethod
216+
def from_handle(ptr: DevicePointerT, size: int, mr: Optional[MemoryResource] = None) -> Buffer:
217+
"""Create a new :class:`Buffer` object from a pointer.
218+
219+
Parameters
220+
----------
221+
ptr : :obj:`~_memory.DevicePointerT`
222+
Allocated buffer handle object
223+
size : int
224+
Memory size of the buffer
225+
mr : :obj:`~_memory.MemoryResource`, optional
226+
Memory resource associated with the buffer
227+
"""
228+
return Buffer._init(ptr, size, mr=mr)
229+
222230

223231
class MemoryResource(abc.ABC):
232+
"""Abstract base class for memory resources that manage allocation and deallocation of buffers.
233+
234+
Subclasses must implement methods for allocating and deallocation, as well as properties
235+
associated with this memory resource from which all allocated buffers will inherit. (Since
236+
all :class:`Buffer` instances allocated and returned by the :meth:`allocate` method would
237+
hold a reference to self, the buffer properties are retrieved simply by looking up the underlying
238+
memory resource's respective property.)
239+
"""
240+
224241
__slots__ = ("_handle",)
225242

226243
@abc.abstractmethod
227-
def __init__(self, *args, **kwargs): ...
244+
def __init__(self, *args, **kwargs):
245+
"""Initialize the memory resource.
246+
247+
Subclasses may use additional arguments to configure the resource.
248+
"""
249+
...
228250

229251
@abc.abstractmethod
230-
def allocate(self, size, stream=None) -> Buffer: ...
252+
def allocate(self, size: int, stream: Stream = None) -> Buffer:
253+
"""Allocate a buffer of the requested size.
254+
255+
Parameters
256+
----------
257+
size : int
258+
The size of the buffer to allocate, in bytes.
259+
stream : object, optional
260+
The stream on which to perform the allocation asynchronously.
261+
If None, allocation is synchronous.
262+
263+
Returns
264+
-------
265+
Buffer
266+
The allocated buffer object, which can be used for device or host operations
267+
depending on the resource's properties.
268+
"""
269+
...
231270

232271
@abc.abstractmethod
233-
def deallocate(self, ptr, size, stream=None): ...
272+
def deallocate(self, ptr: DevicePointerT, size: int, stream: Stream = None):
273+
"""Deallocate a buffer previously allocated by this resource.
274+
275+
Parameters
276+
----------
277+
ptr : object
278+
The pointer or handle to the buffer to deallocate.
279+
size : int
280+
The size of the buffer to deallocate, in bytes.
281+
stream : object, optional
282+
The stream on which to perform the deallocation asynchronously.
283+
If None, deallocation is synchronous.
284+
"""
285+
...
234286

235287
@property
236288
@abc.abstractmethod
237289
def is_device_accessible(self) -> bool:
238-
# Check if the buffers allocated from this MR can be accessed from
239-
# GPUs.
290+
"""bool: True if buffers allocated by this resource can be accessed on the device."""
240291
...
241292

242293
@property
243294
@abc.abstractmethod
244295
def is_host_accessible(self) -> bool:
245-
# Check if the buffers allocated from this MR can be accessed from
246-
# CPUs.
296+
"""bool: True if buffers allocated by this resource can be accessed on the host."""
247297
...
248298

249299
@property
250300
@abc.abstractmethod
251301
def device_id(self) -> int:
252-
# Return the device ID if this MR is for single devices. Raise an
253-
# exception if it is not.
302+
"""int: The device ordinal for which this memory resource is responsible.
303+
304+
Raises
305+
------
306+
RuntimeError
307+
If the resource is not bound to a specific device.
308+
"""
254309
...
255310

256311

257312
class _DefaultAsyncMempool(MemoryResource):
258313
__slots__ = ("_dev_id",)
259314

260-
def __init__(self, dev_id):
315+
def __init__(self, dev_id: int):
261316
self._handle = handle_return(driver.cuDeviceGetMemPool(dev_id))
262317
self._dev_id = dev_id
263318

264-
def allocate(self, size, stream=None) -> Buffer:
319+
def allocate(self, size: int, stream: Stream = None) -> Buffer:
265320
if stream is None:
266321
stream = default_stream()
267322
ptr = handle_return(driver.cuMemAllocFromPoolAsync(size, self._handle, stream.handle))
268-
return Buffer(ptr, size, self)
323+
return Buffer._init(ptr, size, self)
269324

270-
def deallocate(self, ptr, size, stream=None):
325+
def deallocate(self, ptr: DevicePointerT, size: int, stream: Stream = None):
271326
if stream is None:
272327
stream = default_stream()
273328
handle_return(driver.cuMemFreeAsync(ptr, stream.handle))
@@ -290,11 +345,11 @@ def __init__(self):
290345
# TODO: support flags from cuMemHostAlloc?
291346
self._handle = None
292347

293-
def allocate(self, size, stream=None) -> Buffer:
348+
def allocate(self, size: int, stream: Stream = None) -> Buffer:
294349
ptr = handle_return(driver.cuMemAllocHost(size))
295-
return Buffer(ptr, size, self)
350+
return Buffer._init(ptr, size, self)
296351

297-
def deallocate(self, ptr, size, stream=None):
352+
def deallocate(self, ptr: DevicePointerT, size: int, stream: Stream = None):
298353
handle_return(driver.cuMemFreeHost(ptr))
299354

300355
@property
@@ -319,7 +374,7 @@ def __init__(self, dev_id):
319374

320375
def allocate(self, size, stream=None) -> Buffer:
321376
ptr = handle_return(driver.cuMemAlloc(size))
322-
return Buffer(ptr, size, self)
377+
return Buffer._init(ptr, size, self)
323378

324379
def deallocate(self, ptr, size, stream=None):
325380
if stream is None:

0 commit comments

Comments
 (0)