5
5
# this software. Any use, reproduction, disclosure, or distribution of
6
6
# this software and related documentation outside the terms of the EULA
7
7
# is strictly prohibited.
8
- import pytest
9
8
import ctypes
10
9
10
+ import pytest
11
+
11
12
# Always skip since cupy is not CTK 12.x yet
12
13
skip_tests = True
13
14
if not skip_tests :
14
15
try :
15
16
import cupy
17
+
16
18
skip_tests = False
17
19
except ImportError :
18
20
skip_tests = True
19
21
20
22
from .kernels import kernel_string
21
23
24
+
22
25
def launch (kernel , args = ()):
23
26
kernel ((1 ,), (1 ,), args )
24
27
28
+
25
29
# Measure launch latency with no parmaeters
26
30
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
27
31
@pytest .mark .benchmark (group = "cupy" )
28
32
def test_launch_latency_empty_kernel (benchmark ):
29
33
module = cupy .RawModule (code = kernel_string )
30
- kernel = module .get_function (' empty_kernel' )
34
+ kernel = module .get_function (" empty_kernel" )
31
35
32
36
stream = cupy .cuda .stream .Stream (non_blocking = True )
33
37
34
38
with stream :
35
39
benchmark (launch , kernel )
36
40
stream .synchronize ()
37
41
42
+
38
43
# Measure launch latency with a single parameter
39
44
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
40
45
@pytest .mark .benchmark (group = "cupy" )
41
46
def test_launch_latency_small_kernel (benchmark ):
42
47
module = cupy .RawModule (code = kernel_string )
43
- kernel = module .get_function (' small_kernel' )
48
+ kernel = module .get_function (" small_kernel" )
44
49
cupy .cuda .set_allocator ()
45
50
arg = cupy .cuda .alloc (ctypes .sizeof (ctypes .c_float ))
46
51
@@ -50,12 +55,13 @@ def test_launch_latency_small_kernel(benchmark):
50
55
benchmark (launch , kernel , (arg ,))
51
56
stream .synchronize ()
52
57
58
+
53
59
# Measure launch latency with many parameters using builtin parameter packing
54
60
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
55
61
@pytest .mark .benchmark (group = "cupy" )
56
62
def test_launch_latency_small_kernel_512_args (benchmark ):
57
63
module = cupy .RawModule (code = kernel_string )
58
- kernel = module .get_function (' small_kernel_512_args' )
64
+ kernel = module .get_function (" small_kernel_512_args" )
59
65
cupy .cuda .set_allocator ()
60
66
61
67
args = []
@@ -69,12 +75,13 @@ def test_launch_latency_small_kernel_512_args(benchmark):
69
75
benchmark (launch , kernel , args )
70
76
stream .synchronize ()
71
77
78
+
72
79
# Measure launch latency with many parameters using builtin parameter packing
73
80
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
74
81
@pytest .mark .benchmark (group = "cupy" )
75
82
def test_launch_latency_small_kernel_512_bools (benchmark ):
76
83
module = cupy .RawModule (code = kernel_string )
77
- kernel = module .get_function (' small_kernel_512_bools' )
84
+ kernel = module .get_function (" small_kernel_512_bools" )
78
85
cupy .cuda .set_allocator ()
79
86
80
87
args = [True ] * 512
@@ -86,12 +93,13 @@ def test_launch_latency_small_kernel_512_bools(benchmark):
86
93
benchmark (launch , kernel , args )
87
94
stream .synchronize ()
88
95
96
+
89
97
# Measure launch latency with many parameters using builtin parameter packing
90
98
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
91
99
@pytest .mark .benchmark (group = "cupy" )
92
100
def test_launch_latency_small_kernel_512_doubles (benchmark ):
93
101
module = cupy .RawModule (code = kernel_string )
94
- kernel = module .get_function (' small_kernel_512_doubles' )
102
+ kernel = module .get_function (" small_kernel_512_doubles" )
95
103
cupy .cuda .set_allocator ()
96
104
97
105
args = [1.2345 ] * 512
@@ -103,12 +111,13 @@ def test_launch_latency_small_kernel_512_doubles(benchmark):
103
111
benchmark (launch , kernel , args )
104
112
stream .synchronize ()
105
113
114
+
106
115
# Measure launch latency with many parameters using builtin parameter packing
107
116
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
108
117
@pytest .mark .benchmark (group = "cupy" )
109
118
def test_launch_latency_small_kernel_512_ints (benchmark ):
110
119
module = cupy .RawModule (code = kernel_string )
111
- kernel = module .get_function (' small_kernel_512_ints' )
120
+ kernel = module .get_function (" small_kernel_512_ints" )
112
121
cupy .cuda .set_allocator ()
113
122
114
123
args = [123 ] * 512
@@ -120,12 +129,13 @@ def test_launch_latency_small_kernel_512_ints(benchmark):
120
129
benchmark (launch , kernel , args )
121
130
stream .synchronize ()
122
131
132
+
123
133
# Measure launch latency with many parameters using builtin parameter packing
124
134
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
125
135
@pytest .mark .benchmark (group = "cupy" )
126
136
def test_launch_latency_small_kernel_512_bytes (benchmark ):
127
137
module = cupy .RawModule (code = kernel_string )
128
- kernel = module .get_function (' small_kernel_512_chars' )
138
+ kernel = module .get_function (" small_kernel_512_chars" )
129
139
cupy .cuda .set_allocator ()
130
140
131
141
args = [127 ] * 512
@@ -137,12 +147,13 @@ def test_launch_latency_small_kernel_512_bytes(benchmark):
137
147
benchmark (launch , kernel , args )
138
148
stream .synchronize ()
139
149
150
+
140
151
# Measure launch latency with many parameters using builtin parameter packing
141
152
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
142
153
@pytest .mark .benchmark (group = "cupy" )
143
154
def test_launch_latency_small_kernel_512_longlongs (benchmark ):
144
155
module = cupy .RawModule (code = kernel_string )
145
- kernel = module .get_function (' small_kernel_512_longlongs' )
156
+ kernel = module .get_function (" small_kernel_512_longlongs" )
146
157
cupy .cuda .set_allocator ()
147
158
148
159
args = [9223372036854775806 ] * 512
@@ -154,12 +165,13 @@ def test_launch_latency_small_kernel_512_longlongs(benchmark):
154
165
benchmark (launch , kernel , args )
155
166
stream .synchronize ()
156
167
168
+
157
169
# Measure launch latency with many parameters using builtin parameter packing
158
170
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
159
171
@pytest .mark .benchmark (group = "cupy" )
160
172
def test_launch_latency_small_kernel_256_args (benchmark ):
161
173
module = cupy .RawModule (code = kernel_string )
162
- kernel = module .get_function (' small_kernel_256_args' )
174
+ kernel = module .get_function (" small_kernel_256_args" )
163
175
cupy .cuda .set_allocator ()
164
176
165
177
args = []
@@ -173,12 +185,13 @@ def test_launch_latency_small_kernel_256_args(benchmark):
173
185
benchmark (launch , kernel , args )
174
186
stream .synchronize ()
175
187
188
+
176
189
# Measure launch latency with many parameters using builtin parameter packing
177
190
@pytest .mark .skipif (skip_tests , reason = "cupy is not installed" )
178
191
@pytest .mark .benchmark (group = "cupy" )
179
192
def test_launch_latency_small_kernel_16_args (benchmark ):
180
193
module = cupy .RawModule (code = kernel_string )
181
- kernel = module .get_function (' small_kernel_16_args' )
194
+ kernel = module .get_function (" small_kernel_16_args" )
182
195
cupy .cuda .set_allocator ()
183
196
184
197
args = []
0 commit comments