1
+ # FILE: test_memory.py
2
+
3
+ from cuda .core .experimental ._memory import Buffer , MemoryResource
4
+ from cuda .core .experimental ._device import Device
5
+ from cuda import cuda
6
+ from cuda .core .experimental ._utils import handle_return
7
+ import ctypes
8
+
9
+ @pytest .fixture (scope = 'module' )
10
+ def init_cuda ():
11
+ Device ().set_current ()
12
+
13
+ class DummyDeviceMemoryResource (MemoryResource ):
14
+ def __init__ (self , device ):
15
+ self .device = device
16
+ pass
17
+
18
+ def allocate (self , size , stream = None ) -> Buffer :
19
+ ptr = handle_return (cuda .cuMemAlloc (size ))
20
+ return Buffer (ptr = ptr , size = size , mr = self )
21
+
22
+ def deallocate (self , ptr , size , stream = None ):
23
+ cuda .cuMemFree (ptr )
24
+
25
+ @property
26
+ def is_device_accessible (self ) -> bool :
27
+ return True
28
+
29
+ @property
30
+ def is_host_accessible (self ) -> bool :
31
+ return False
32
+
33
+ @property
34
+ def device_id (self ) -> int :
35
+ return 0
36
+
37
+ class DummyHostMemoryResource (MemoryResource ):
38
+ def __init__ (self ):
39
+ pass
40
+
41
+ def allocate (self , size , stream = None ) -> Buffer :
42
+ # Allocate a ctypes buffer of size `size`
43
+ ptr = (ctypes .c_byte * size )()
44
+ return Buffer (ptr = ptr , size = size , mr = self )
45
+
46
+ def deallocate (self , ptr , size , stream = None ):
47
+ #the memory is deallocated per the ctypes deallocation at garbage collection time
48
+ pass
49
+
50
+ @property
51
+ def is_device_accessible (self ) -> bool :
52
+ return False
53
+
54
+ @property
55
+ def is_host_accessible (self ) -> bool :
56
+ return True
57
+
58
+ @property
59
+ def device_id (self ) -> int :
60
+ raise RuntimeError ("the pinned memory resource is not bound to any GPU" )
61
+
62
+ class DummyUnifiedMemoryResource (MemoryResource ):
63
+ def __init__ (self , device ):
64
+ self .device = device
65
+ pass
66
+
67
+ def allocate (self , size , stream = None ) -> Buffer :
68
+ ptr = handle_return (cuda .cuMemAllocManaged (size , cuda .CUmemAttach_flags .CU_MEM_ATTACH_GLOBAL .value ))
69
+ return Buffer (ptr = ptr , size = size , mr = self )
70
+
71
+ def deallocate (self , ptr , size , stream = None ):
72
+ cuda .cuMemFree (ptr )
73
+
74
+ @property
75
+ def is_device_accessible (self ) -> bool :
76
+ return True
77
+
78
+ @property
79
+ def is_host_accessible (self ) -> bool :
80
+ return True
81
+
82
+ @property
83
+ def device_id (self ) -> int :
84
+ return 0
85
+
86
+ class DummyPinnedMemoryResource (MemoryResource ):
87
+ def __init__ (self , device ):
88
+ self .device = device
89
+ pass
90
+
91
+ def allocate (self , size , stream = None ) -> Buffer :
92
+ ptr = handle_return (cuda .cuMemAllocHost (size ))
93
+ return Buffer (ptr = ptr , size = size , mr = self )
94
+
95
+ def deallocate (self , ptr , size , stream = None ):
96
+ cuda .cuMemFreeHost (ptr )
97
+
98
+ @property
99
+ def is_device_accessible (self ) -> bool :
100
+ return True
101
+
102
+ @property
103
+ def is_host_accessible (self ) -> bool :
104
+ return True
105
+
106
+ @property
107
+ def device_id (self ) -> int :
108
+ raise RuntimeError ("the pinned memory resource is not bound to any GPU" )
109
+
110
+ def buffer_initialization (dummy_mr : MemoryResource ):
111
+ buffer = dummy_mr .allocate (size = 1024 )
112
+ assert buffer .handle != 0
113
+ assert buffer .size == 1024
114
+ assert buffer .memory_resource == dummy_mr
115
+ assert buffer .is_device_accessible == dummy_mr .is_device_accessible
116
+ assert buffer .is_host_accessible == dummy_mr .is_host_accessible
117
+ dummy_mr .deallocate (buffer .handle , buffer .size )
118
+
119
+ def test_buffer_initialization ():
120
+ device = Device ()
121
+ device .set_current ()
122
+ buffer_initialization (DummyDeviceMemoryResource (device ))
123
+ buffer_initialization (DummyHostMemoryResource ())
124
+ buffer_initialization (DummyUnifiedMemoryResource (device ))
125
+ buffer_initialization (DummyPinnedMemoryResource (device ))
126
+
127
+ def buffer_copy_to (dummy_mr : MemoryResource , device : Device , check = False ):
128
+ src_buffer = dummy_mr .allocate (size = 1024 )
129
+ dst_buffer = dummy_mr .allocate (size = 1024 )
130
+ stream = device .create_stream ()
131
+
132
+ if check :
133
+ src_ptr = ctypes .cast (src_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
134
+ for i in range (1024 ):
135
+ src_ptr [i ] = ctypes .c_byte (i )
136
+
137
+ src_buffer .copy_to (dst_buffer , stream = stream )
138
+ device .sync ()
139
+
140
+ if check :
141
+ dst_ptr = ctypes .cast (dst_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
142
+
143
+ for i in range (10 ):
144
+ assert dst_ptr [i ] == src_ptr [i ]
145
+
146
+ dummy_mr .deallocate (src_buffer .handle , src_buffer .size )
147
+ dummy_mr .deallocate (dst_buffer .handle , dst_buffer .size )
148
+
149
+ def test_buffer_copy_to ():
150
+ device = Device ()
151
+ device .set_current ()
152
+ buffer_copy_to (DummyDeviceMemoryResource (device ), device )
153
+ buffer_copy_to (DummyUnifiedMemoryResource (device ), device )
154
+ buffer_copy_to (DummyPinnedMemoryResource (device ), device , check = True )
155
+
156
+ def buffer_copy_from (dummy_mr : MemoryResource , device , check = False ):
157
+ src_buffer = dummy_mr .allocate (size = 1024 )
158
+ dst_buffer = dummy_mr .allocate (size = 1024 )
159
+ stream = device .create_stream ()
160
+
161
+ if check :
162
+ src_ptr = ctypes .cast (src_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
163
+ for i in range (1024 ):
164
+ src_ptr [i ] = ctypes .c_byte (i )
165
+
166
+ dst_buffer .copy_from (src_buffer , stream = stream )
167
+ device .sync ()
168
+
169
+ if check :
170
+ dst_ptr = ctypes .cast (dst_buffer .handle , ctypes .POINTER (ctypes .c_byte ))
171
+
172
+ for i in range (10 ):
173
+ assert dst_ptr [i ] == src_ptr [i ]
174
+
175
+ dummy_mr .deallocate (src_buffer .handle , src_buffer .size )
176
+ dummy_mr .deallocate (dst_buffer .handle , dst_buffer .size )
177
+
178
+ def test_buffer_copy_from ():
179
+ device = Device ()
180
+ device .set_current ()
181
+ buffer_copy_from (DummyDeviceMemoryResource (device ), device )
182
+ buffer_copy_from (DummyUnifiedMemoryResource (device ), device )
183
+ buffer_copy_from (DummyPinnedMemoryResource (device ), device , check = True )
184
+
185
+ def buffer_close (dummy_mr : MemoryResource ):
186
+ buffer = dummy_mr .allocate (size = 1024 )
187
+ buffer .close ()
188
+ assert buffer .handle == 0
189
+ assert buffer .memory_resource == None
190
+
191
+ def test_buffer_close ():
192
+ device = Device ()
193
+ device .set_current ()
194
+ buffer_close (DummyDeviceMemoryResource (device ))
195
+ buffer_close (DummyHostMemoryResource ())
196
+ buffer_close (DummyUnifiedMemoryResource (device ))
197
+ buffer_close (DummyPinnedMemoryResource (device ))
198
+
199
+ test_buffer_copy_to ()
0 commit comments