Skip to content

Commit 9ca8bd6

Browse files
committed
add event timing
1 parent 03001e5 commit 9ca8bd6

File tree

3 files changed

+44
-4
lines changed

3 files changed

+44
-4
lines changed

cuda_core/cuda/core/experimental/_event.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,25 @@ class Event:
4747
the last recorded stream.
4848
4949
Events can be used to monitor device's progress, query completion
50-
of work up to event's record, and help establish dependencies
51-
between GPU work submissions.
50+
of work up to event's record, help establish dependencies
51+
between GPU work submissions, and record the elapsed time on GPU:
52+
53+
.. code-block:: python
54+
55+
# To create events and record the timing:
56+
s = Device(0).create_stream()
57+
e1 = s.record(options={"enable_timing": True})
58+
# ... run some GPU works ...
59+
e2 = s.record(options={"enable_timing": True})
60+
e2.sync()
61+
print(f"time = {e2 - e1}")
62+
63+
# Or, if events are already created:
64+
s.record(e1)
65+
# ... run some more GPU works ...
66+
s.record(e2)
67+
e2.sync()
68+
print(f"time = {e2 - e1}")
5269
5370
Directly creating an :obj:`~_event.Event` is not supported due to ambiguity,
5471
and they should instead be created through a :obj:`~_stream.Stream` object.
@@ -96,6 +113,17 @@ def close(self):
96113
"""Destroy the event."""
97114
self._mnff.close()
98115

116+
def __isub__(self, other):
117+
return NotImplemented
118+
119+
def __rsub__(self, other):
120+
return NotImplemented
121+
122+
def __sub__(self, other):
123+
# return self - other
124+
timing = handle_return(driver.cuEventElapsedTime(other.handle, self.handle))
125+
return timing
126+
99127
@property
100128
def is_timing_disabled(self) -> bool:
101129
"""Return True if the event does not record timing data, otherwise False."""

cuda_core/docs/source/release/0.2.0-notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ New features
2525
- Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`.
2626
- A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`).
2727
- For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information.
28+
- Support CUDA event timing.
2829

2930
Limitations
3031
-----------

cuda_core/tests/test_event.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# this software and related documentation outside the terms of the EULA
77
# is strictly prohibited.
88

9+
import time
10+
911
import pytest
1012

1113
from cuda.core.experimental import Device, EventOptions
@@ -15,8 +17,17 @@
1517
def test_timing(init_cuda, enable_timing):
1618
options = EventOptions(enable_timing=enable_timing)
1719
stream = Device().create_stream()
18-
event = stream.record(options=options)
19-
assert event.is_timing_disabled == (not enable_timing if enable_timing is not None else True)
20+
n_seconds = 0.5
21+
e1 = stream.record(options=options)
22+
time.sleep(n_seconds)
23+
e2 = stream.record(options=options)
24+
for e in (e1, e2):
25+
assert e.is_timing_disabled == (not enable_timing if enable_timing is not None else True)
26+
if enable_timing:
27+
e2.sync()
28+
elapsed_time = e2 - e1
29+
assert isinstance(elapsed_time, float)
30+
assert n_seconds * 1000 <= elapsed_time < n_seconds * 1000 + 2 # tolerance 2 ms
2031

2132

2233
def test_is_sync_busy_waited(init_cuda):

0 commit comments

Comments
 (0)