9
9
from typing import Optional , Tuple , TypeVar , Union
10
10
11
11
from cuda .core .experimental ._dlpack import DLDeviceType , make_py_capsule
12
- from cuda .core .experimental ._stream import default_stream
12
+ from cuda .core .experimental ._stream import Stream , default_stream
13
13
from cuda .core .experimental ._utils .cuda_utils import driver , handle_return
14
14
15
- PyCapsule = TypeVar ("PyCapsule" )
16
-
17
-
18
15
# TODO: define a memory property mixin class and make Buffer and
19
16
# MemoryResource both inherit from it
20
17
18
+
19
+ PyCapsule = TypeVar ("PyCapsule" )
20
+ """Represent the capsule type."""
21
+
21
22
DevicePointerT = Union [driver .CUdeviceptr , int , None ]
22
- """A type union of `Cudeviceptr `, `int` and `None` for hinting Buffer.handle."""
23
+ """A type union of :obj:`~driver.CUdeviceptr `, `int` and `None` for hinting :attr:` Buffer.handle` ."""
23
24
24
25
25
26
class Buffer :
@@ -29,19 +30,7 @@ class Buffer:
29
30
different memory resources are to give access to their memory
30
31
allocations.
31
32
32
- Support for data interchange mechanisms are provided by
33
- establishing both the DLPack and the Python-level buffer
34
- protocols.
35
-
36
- Parameters
37
- ----------
38
- ptr : Any
39
- Allocated buffer handle object
40
- size : Any
41
- Memory size of the buffer
42
- mr : :obj:`~_memory.MemoryResource`, optional
43
- Memory resource associated with the buffer
44
-
33
+ Support for data interchange mechanisms are provided by DLPack.
45
34
"""
46
35
47
36
class _MembersNeededForFinalize :
@@ -64,22 +53,26 @@ def close(self, stream=None):
64
53
# TODO: handle ownership? (_mr could be None)
65
54
__slots__ = ("__weakref__" , "_mnff" )
66
55
67
- def __init__ (self , ptr , size , mr : MemoryResource = None ):
56
+ def __new__ (self , * args , ** kwargs ):
57
+ raise RuntimeError ("Buffer objects cannot be instantiated directly. Please use MemoryResource APIs." )
58
+
59
+ @classmethod
60
+ def _init (cls , ptr : DevicePointerT , size : int , mr : Optional [MemoryResource ] = None ):
61
+ self = super ().__new__ (cls )
68
62
self ._mnff = Buffer ._MembersNeededForFinalize (self , ptr , size , mr )
63
+ return self
69
64
70
- def close (self , stream = None ):
65
+ def close (self , stream : Stream = None ):
71
66
"""Deallocate this buffer asynchronously on the given stream.
72
67
73
68
This buffer is released back to their memory resource
74
69
asynchronously on the given stream.
75
70
76
71
Parameters
77
72
----------
78
- stream : Any, optional
79
- The stream object with a __cuda_stream__ protocol to
80
- use for asynchronous deallocation. Defaults to using
81
- the default stream.
82
-
73
+ stream : Stream, optional
74
+ The stream object to use for asynchronous deallocation. If not set,
75
+ the current default is to the default stream.
83
76
"""
84
77
self ._mnff .close (stream )
85
78
@@ -95,7 +88,7 @@ def handle(self) -> DevicePointerT:
95
88
return self ._mnff .ptr
96
89
97
90
@property
98
- def size (self ):
91
+ def size (self ) -> int :
99
92
"""Return the memory size of this buffer."""
100
93
return self ._mnff .size
101
94
@@ -125,7 +118,7 @@ def device_id(self) -> int:
125
118
return self ._mnff .mr .device_id
126
119
raise NotImplementedError ("WIP: Currently this property only supports buffers with associated MemoryResource" )
127
120
128
- def copy_to (self , dst : Buffer = None , * , stream ) -> Buffer :
121
+ def copy_to (self , dst : Buffer = None , * , stream : Stream ) -> Buffer :
129
122
"""Copy from this buffer to the dst buffer asynchronously on the given stream.
130
123
131
124
Copies the data from this buffer to the provided dst buffer.
@@ -136,7 +129,7 @@ def copy_to(self, dst: Buffer = None, *, stream) -> Buffer:
136
129
----------
137
130
dst : :obj:`~_memory.Buffer`
138
131
Source buffer to copy data from
139
- stream : Any
132
+ stream : Stream
140
133
Keyword argument specifying the stream for the
141
134
asynchronous copy
142
135
@@ -154,14 +147,14 @@ def copy_to(self, dst: Buffer = None, *, stream) -> Buffer:
154
147
handle_return (driver .cuMemcpyAsync (dst ._mnff .ptr , self ._mnff .ptr , self ._mnff .size , stream .handle ))
155
148
return dst
156
149
157
- def copy_from (self , src : Buffer , * , stream ):
150
+ def copy_from (self , src : Buffer , * , stream : Stream ):
158
151
"""Copy from the src buffer to this buffer asynchronously on the given stream.
159
152
160
153
Parameters
161
154
----------
162
155
src : :obj:`~_memory.Buffer`
163
156
Source buffer to copy data from
164
- stream : Any
157
+ stream : Stream
165
158
Keyword argument specifying the stream for the
166
159
asynchronous copy
167
160
@@ -219,55 +212,117 @@ def __release_buffer__(self, buffer: memoryview, /):
219
212
# Supporting method paired with __buffer__.
220
213
raise NotImplementedError ("WIP: Buffer.__release_buffer__ hasn't been implemented yet." )
221
214
215
+ @staticmethod
216
+ def from_handle (ptr : DevicePointerT , size : int , mr : Optional [MemoryResource ] = None ) -> Buffer :
217
+ """Create a new :class:`Buffer` object from a pointer.
218
+
219
+ Parameters
220
+ ----------
221
+ ptr : :obj:`~_memory.DevicePointerT`
222
+ Allocated buffer handle object
223
+ size : int
224
+ Memory size of the buffer
225
+ mr : :obj:`~_memory.MemoryResource`, optional
226
+ Memory resource associated with the buffer
227
+ """
228
+ return Buffer ._init (ptr , size , mr = mr )
229
+
222
230
223
231
class MemoryResource (abc .ABC ):
232
+ """Abstract base class for memory resources that manage allocation and deallocation of buffers.
233
+
234
+ Subclasses must implement methods for allocating and deallocation, as well as properties
235
+ associated with this memory resource from which all allocated buffers will inherit. (Since
236
+ all :class:`Buffer` instances allocated and returned by the :meth:`allocate` method would
237
+ hold a reference to self, the buffer properties are retrieved simply by looking up the underlying
238
+ memory resource's respective property.)
239
+ """
240
+
224
241
__slots__ = ("_handle" ,)
225
242
226
243
@abc .abstractmethod
227
- def __init__ (self , * args , ** kwargs ): ...
244
+ def __init__ (self , * args , ** kwargs ):
245
+ """Initialize the memory resource.
246
+
247
+ Subclasses may use additional arguments to configure the resource.
248
+ """
249
+ ...
228
250
229
251
@abc .abstractmethod
230
- def allocate (self , size , stream = None ) -> Buffer : ...
252
+ def allocate (self , size : int , stream : Stream = None ) -> Buffer :
253
+ """Allocate a buffer of the requested size.
254
+
255
+ Parameters
256
+ ----------
257
+ size : int
258
+ The size of the buffer to allocate, in bytes.
259
+ stream : object, optional
260
+ The stream on which to perform the allocation asynchronously.
261
+ If None, allocation is synchronous.
262
+
263
+ Returns
264
+ -------
265
+ Buffer
266
+ The allocated buffer object, which can be used for device or host operations
267
+ depending on the resource's properties.
268
+ """
269
+ ...
231
270
232
271
@abc .abstractmethod
233
- def deallocate (self , ptr , size , stream = None ): ...
272
+ def deallocate (self , ptr : DevicePointerT , size : int , stream : Stream = None ):
273
+ """Deallocate a buffer previously allocated by this resource.
274
+
275
+ Parameters
276
+ ----------
277
+ ptr : object
278
+ The pointer or handle to the buffer to deallocate.
279
+ size : int
280
+ The size of the buffer to deallocate, in bytes.
281
+ stream : object, optional
282
+ The stream on which to perform the deallocation asynchronously.
283
+ If None, deallocation is synchronous.
284
+ """
285
+ ...
234
286
235
287
@property
236
288
@abc .abstractmethod
237
289
def is_device_accessible (self ) -> bool :
238
- # Check if the buffers allocated from this MR can be accessed from
239
- # GPUs.
290
+ """bool: True if buffers allocated by this resource can be accessed on the device."""
240
291
...
241
292
242
293
@property
243
294
@abc .abstractmethod
244
295
def is_host_accessible (self ) -> bool :
245
- # Check if the buffers allocated from this MR can be accessed from
246
- # CPUs.
296
+ """bool: True if buffers allocated by this resource can be accessed on the host."""
247
297
...
248
298
249
299
@property
250
300
@abc .abstractmethod
251
301
def device_id (self ) -> int :
252
- # Return the device ID if this MR is for single devices. Raise an
253
- # exception if it is not.
302
+ """int: The device ordinal for which this memory resource is responsible.
303
+
304
+ Raises
305
+ ------
306
+ RuntimeError
307
+ If the resource is not bound to a specific device.
308
+ """
254
309
...
255
310
256
311
257
312
class _DefaultAsyncMempool (MemoryResource ):
258
313
__slots__ = ("_dev_id" ,)
259
314
260
- def __init__ (self , dev_id ):
315
+ def __init__ (self , dev_id : int ):
261
316
self ._handle = handle_return (driver .cuDeviceGetMemPool (dev_id ))
262
317
self ._dev_id = dev_id
263
318
264
- def allocate (self , size , stream = None ) -> Buffer :
319
+ def allocate (self , size : int , stream : Stream = None ) -> Buffer :
265
320
if stream is None :
266
321
stream = default_stream ()
267
322
ptr = handle_return (driver .cuMemAllocFromPoolAsync (size , self ._handle , stream .handle ))
268
- return Buffer (ptr , size , self )
323
+ return Buffer . _init (ptr , size , self )
269
324
270
- def deallocate (self , ptr , size , stream = None ):
325
+ def deallocate (self , ptr : DevicePointerT , size : int , stream : Stream = None ):
271
326
if stream is None :
272
327
stream = default_stream ()
273
328
handle_return (driver .cuMemFreeAsync (ptr , stream .handle ))
@@ -290,11 +345,11 @@ def __init__(self):
290
345
# TODO: support flags from cuMemHostAlloc?
291
346
self ._handle = None
292
347
293
- def allocate (self , size , stream = None ) -> Buffer :
348
+ def allocate (self , size : int , stream : Stream = None ) -> Buffer :
294
349
ptr = handle_return (driver .cuMemAllocHost (size ))
295
- return Buffer (ptr , size , self )
350
+ return Buffer . _init (ptr , size , self )
296
351
297
- def deallocate (self , ptr , size , stream = None ):
352
+ def deallocate (self , ptr : DevicePointerT , size : int , stream : Stream = None ):
298
353
handle_return (driver .cuMemFreeHost (ptr ))
299
354
300
355
@property
@@ -319,7 +374,7 @@ def __init__(self, dev_id):
319
374
320
375
def allocate (self , size , stream = None ) -> Buffer :
321
376
ptr = handle_return (driver .cuMemAlloc (size ))
322
- return Buffer (ptr , size , self )
377
+ return Buffer . _init (ptr , size , self )
323
378
324
379
def deallocate (self , ptr , size , stream = None ):
325
380
if stream is None :
0 commit comments