Skip to content

Commit 58323ac

Browse files
committed
fix buffer destructor; highlight each MR needs to clarify the handling of stream=None; more docs
1 parent cc0f6ce commit 58323ac

File tree

3 files changed

+98
-23
lines changed

3 files changed

+98
-23
lines changed

cuda_core/cuda/core/experimental/_device.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from cuda.core.experimental._context import Context, ContextOptions
99
from cuda.core.experimental._event import Event, EventOptions
10-
from cuda.core.experimental._memory import Buffer, MemoryResource, _DefaultAsyncMempool, _SynchronousMemoryResource
10+
from cuda.core.experimental._memory import Buffer, DeviceMemoryResource, MemoryResource, _SynchronousMemoryResource
1111
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions, default_stream
1212
from cuda.core.experimental._utils.clear_error_support import assert_type
1313
from cuda.core.experimental._utils.cuda_utils import (
@@ -1003,7 +1003,7 @@ def __new__(cls, device_id: Optional[int] = None):
10031003
)
10041004
)
10051005
) == 1:
1006-
dev._mr = _DefaultAsyncMempool(dev_id)
1006+
dev._mr = DeviceMemoryResource(dev_id)
10071007
else:
10081008
dev._mr = _SynchronousMemoryResource(dev_id)
10091009

cuda_core/cuda/core/experimental/_memory.py

Lines changed: 92 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ def __init__(self, buffer_obj, ptr, size, mr):
4444

4545
def close(self, stream=None):
4646
if self.ptr and self.mr is not None:
47-
if stream is None:
48-
stream = default_stream()
4947
self.mr.deallocate(self.ptr, self.size, stream)
5048
self.ptr = 0
5149
self.mr = None
@@ -71,8 +69,8 @@ def close(self, stream: Stream = None):
7169
Parameters
7270
----------
7371
stream : Stream, optional
74-
The stream object to use for asynchronous deallocation. If not set,
75-
the current default is to the default stream.
72+
The stream object to use for asynchronous deallocation. If None,
73+
the behavior depends on the underlying memory resource.
7674
"""
7775
self._mnff.close(stream)
7876

@@ -256,9 +254,10 @@ def allocate(self, size: int, stream: Stream = None) -> Buffer:
256254
----------
257255
size : int
258256
The size of the buffer to allocate, in bytes.
259-
stream : object, optional
257+
stream : Stream, optional
260258
The stream on which to perform the allocation asynchronously.
261-
If None, allocation is synchronous.
259+
If None, it is up to each memory resource implementation to decide
260+
and document the behavior.
262261
263262
Returns
264263
-------
@@ -274,13 +273,14 @@ def deallocate(self, ptr: DevicePointerT, size: int, stream: Stream = None):
274273
275274
Parameters
276275
----------
277-
ptr : object
276+
ptr : :obj:`~_memory.DevicePointerT`
278277
The pointer or handle to the buffer to deallocate.
279278
size : int
280279
The size of the buffer to deallocate, in bytes.
281-
stream : object, optional
280+
stream : Stream, optional
282281
The stream on which to perform the deallocation asynchronously.
283-
If None, deallocation is synchronous.
282+
If None, it is up to each memory resource implementation to decide
283+
and document the behavior.
284284
"""
285285
...
286286

@@ -309,68 +309,144 @@ def device_id(self) -> int:
309309
...
310310

311311

312-
class _DefaultAsyncMempool(MemoryResource):
312+
class DeviceMemoryResource(MemoryResource):
313+
"""Create a device memory resource that uses the driver's stream-ordered memory pool.
314+
315+
Parameters
316+
----------
317+
device_id : int
318+
Device ordinal for which a memory resource is constructed. The mempool that is
319+
set to *current* on ``device_id`` is used. If no mempool is set to current yet,
320+
the driver would use the *default* mempool on the device.
321+
"""
322+
313323
__slots__ = ("_dev_id",)
314324

315-
def __init__(self, dev_id: int):
316-
self._handle = handle_return(driver.cuDeviceGetMemPool(dev_id))
317-
self._dev_id = dev_id
325+
def __init__(self, device_id: int):
326+
self._handle = handle_return(driver.cuDeviceGetMemPool(device_id))
327+
self._dev_id = device_id
318328

319329
def allocate(self, size: int, stream: Stream = None) -> Buffer:
330+
"""Allocate a buffer of the requested size.
331+
332+
Parameters
333+
----------
334+
size : int
335+
The size of the buffer to allocate, in bytes.
336+
stream : Stream, optional
337+
The stream on which to perform the allocation asynchronously.
338+
If None, an internal stream is used.
339+
340+
Returns
341+
-------
342+
Buffer
343+
The allocated buffer object, which is accessible on the device that this memory
344+
resource was created for.
345+
"""
320346
if stream is None:
321347
stream = default_stream()
322348
ptr = handle_return(driver.cuMemAllocFromPoolAsync(size, self._handle, stream.handle))
323349
return Buffer._init(ptr, size, self)
324350

325351
def deallocate(self, ptr: DevicePointerT, size: int, stream: Stream = None):
352+
"""Deallocate a buffer previously allocated by this resource.
353+
354+
Parameters
355+
----------
356+
ptr : :obj:`~_memory.DevicePointerT`
357+
The pointer or handle to the buffer to deallocate.
358+
size : int
359+
The size of the buffer to deallocate, in bytes.
360+
stream : Stream, optional
361+
The stream on which to perform the deallocation asynchronously.
362+
If None, an internal stream is used.
363+
"""
326364
if stream is None:
327365
stream = default_stream()
328366
handle_return(driver.cuMemFreeAsync(ptr, stream.handle))
329367

330368
@property
331369
def is_device_accessible(self) -> bool:
370+
"""bool: this memory resource provides device-accessible buffers."""
332371
return True
333372

334373
@property
335374
def is_host_accessible(self) -> bool:
375+
"""bool: this memory resource does not provides host-accessible buffers."""
336376
return False
337377

338378
@property
339379
def device_id(self) -> int:
380+
"""int: the associated device ordinal."""
340381
return self._dev_id
341382

342383

343-
class _DefaultPinnedMemorySource(MemoryResource):
384+
class LegacyPinnedMemoryResource(MemoryResource):
385+
"""Create a pinned memory resource that uses legacy cuMemAllocHost/cudaMallocHost
386+
APIs.
387+
"""
388+
344389
def __init__(self):
345390
# TODO: support flags from cuMemHostAlloc?
346391
self._handle = None
347392

348393
def allocate(self, size: int, stream: Stream = None) -> Buffer:
394+
"""Allocate a buffer of the requested size.
395+
396+
Parameters
397+
----------
398+
size : int
399+
The size of the buffer to allocate, in bytes.
400+
stream : Stream, optional
401+
Currently ignored
402+
403+
Returns
404+
-------
405+
Buffer
406+
The allocated buffer object, which is accessible on both host and device.
407+
"""
349408
ptr = handle_return(driver.cuMemAllocHost(size))
350409
return Buffer._init(ptr, size, self)
351410

352411
def deallocate(self, ptr: DevicePointerT, size: int, stream: Stream = None):
412+
"""Deallocate a buffer previously allocated by this resource.
413+
414+
Parameters
415+
----------
416+
ptr : :obj:`~_memory.DevicePointerT`
417+
The pointer or handle to the buffer to deallocate.
418+
size : int
419+
The size of the buffer to deallocate, in bytes.
420+
stream : Stream, optional
421+
The stream on which to perform the deallocation asynchronously.
422+
If None, no synchronization would happen.
423+
"""
424+
if stream:
425+
stream.sync()
353426
handle_return(driver.cuMemFreeHost(ptr))
354427

355428
@property
356429
def is_device_accessible(self) -> bool:
430+
"""bool: this memory resource provides device-accessible buffers."""
357431
return True
358432

359433
@property
360434
def is_host_accessible(self) -> bool:
435+
"""bool: this memory resource provides host-accessible buffers."""
361436
return True
362437

363438
@property
364439
def device_id(self) -> int:
440+
"""This memory resource is not bound to any GPU."""
365441
raise RuntimeError("a pinned memory resource is not bound to any GPU")
366442

367443

368444
class _SynchronousMemoryResource(MemoryResource):
369445
__slots__ = ("_dev_id",)
370446

371-
def __init__(self, dev_id):
447+
def __init__(self, device_id):
372448
self._handle = None
373-
self._dev_id = dev_id
449+
self._dev_id = device_id
374450

375451
def allocate(self, size, stream=None) -> Buffer:
376452
ptr = handle_return(driver.cuMemAlloc(size))
@@ -393,7 +469,3 @@ def is_host_accessible(self) -> bool:
393469
@property
394470
def device_id(self) -> int:
395471
return self._dev_id
396-
397-
398-
DeviceMemoryResource = _DefaultAsyncMempool
399-
LegacyPinnedMemoryResource = _DefaultPinnedMemorySource

cuda_core/docs/source/release/0.3.0-notes.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ Highlights
1717
Breaking Changes
1818
----------------
1919

20-
- The internal ``Buffer`` object's ``__init__()`` method is removed, see below.
20+
- The :class:`Buffer` object's ``__init__()`` method is removed, see below.
21+
- The :class:`Buffer` object's :meth:`~Buffer.close` method and destructor now always defer to the underlying memory resource implementation
22+
to decide the behavior if a stream is not explicitly passed. Previously, in this case it always uses the default stream, which could
23+
interfere with the memory resource's assumptions.
2124

2225

2326
New features

0 commit comments

Comments
 (0)