Skip to content

Commit 28537dd

Browse files
committed
re-run linter and formatter before submit
1 parent e36241a commit 28537dd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+3949
-2503
lines changed

.pre-commit-config.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Copyright (c) 2024, NVIDIA CORPORATION.
2+
3+
repos:
4+
- repo: https://github.com/astral-sh/ruff-pre-commit
5+
rev: v0.6.4
6+
hooks:
7+
- id: ruff
8+
args: [--fix, --show-fixes]
9+
- id: ruff-format
10+
11+
default_language_version:
12+
python: python3

continuous_integration/scripts/render-template.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import argparse
44
import json
55
from jinja2 import Environment, FileSystemLoader
6-
import os
76
import re
87

8+
99
# TODO: make this work for arbitrary context. ie. implement replace_using_context()
1010
def replace_placeholder(source_str, variable_name, variable_value):
1111
# Escaping any regex special characters in variable_name
@@ -14,39 +14,49 @@ def replace_placeholder(source_str, variable_name, variable_value):
1414
# Using regular expression to replace ${variable_name} with actual variable_value
1515
# \s* means any amount of whitespace (including none)
1616
# pattern = rf'\$\{{\s*\{{\s*{variable_name_escaped}\s*\}}\s*\}}'
17-
pattern = rf'<<\s*{variable_name_escaped}\s*>>'
17+
pattern = rf"<<\s*{variable_name_escaped}\s*>>"
1818
return re.sub(pattern, variable_value.strip(), source_str)
1919

20+
2021
# Setup command-line argument parsing
21-
parser = argparse.ArgumentParser(description='Render a Jinja2 template using a JSON context.')
22-
parser.add_argument('template_file', type=str, help='Path to the Jinja2 template file (with .j2 extension).')
23-
parser.add_argument('json_file', type=str, help='Path to the JSON file to use as the rendering context.')
24-
parser.add_argument('output_file', type=str, help='Path to the output file.')
22+
parser = argparse.ArgumentParser(
23+
description="Render a Jinja2 template using a JSON context."
24+
)
25+
parser.add_argument(
26+
"template_file",
27+
type=str,
28+
help="Path to the Jinja2 template file (with .j2 extension).",
29+
)
30+
parser.add_argument(
31+
"json_file", type=str, help="Path to the JSON file to use as the rendering context."
32+
)
33+
parser.add_argument("output_file", type=str, help="Path to the output file.")
2534

2635
args = parser.parse_args()
2736

2837
# Load JSON file as the rendering context
29-
with open(args.json_file, 'r') as file:
38+
with open(args.json_file, "r") as file:
3039
context = json.load(file)
3140

3241
# Setup Jinja2 environment and load the template
3342
env = Environment(
34-
loader=FileSystemLoader(searchpath='./'),
35-
variable_start_string='<<',
36-
variable_end_string='>>',
37-
block_start_string='<%',
38-
block_end_string='%>',
39-
comment_start_string='<#',
40-
comment_end_string='#>')
41-
env.filters['replace_placeholder'] = replace_placeholder
43+
loader=FileSystemLoader(searchpath="./"),
44+
variable_start_string="<<",
45+
variable_end_string=">>",
46+
block_start_string="<%",
47+
block_end_string="%>",
48+
comment_start_string="<#",
49+
comment_end_string="#>",
50+
)
51+
env.filters["replace_placeholder"] = replace_placeholder
4252

4353
template = env.get_template(args.template_file)
4454

4555
# Render the template with the context
4656
rendered_content = template.render(context)
4757
# print(rendered_content)
4858

49-
with open(args.output_file, 'w') as file:
59+
with open(args.output_file, "w") as file:
5060
file.write(rendered_content)
5161

52-
print(f'Template rendered successfully. Output saved to {args.output_file}')
62+
print(f"Template rendered successfully. Output saved to {args.output_file}")

cuda_bindings/benchmarks/kernels.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
# this software. Any use, reproduction, disclosure, or distribution of
66
# this software and related documentation outside the terms of the EULA
77
# is strictly prohibited.
8-
kernel_string = '''\
8+
kernel_string = """\
99
#define ITEM_PARAM(x, T) T x
10-
#define REP1(x, T) , ITEM_PARAM(x, T)
10+
#define REP1(x, T) , ITEM_PARAM(x, T)
1111
#define REP2(x, T) REP1(x##0, T) REP1(x##1, T)
1212
#define REP4(x, T) REP2(x##0, T) REP2(x##1, T)
1313
#define REP8(x, T) REP4(x##0, T) REP4(x##1, T)
@@ -160,4 +160,4 @@
160160
// Do not touch param to prevent compiler from copying
161161
// the whole structure from const bank to lmem.
162162
}
163-
'''
163+
"""

cuda_bindings/benchmarks/perf_test_utils.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,30 @@
55
# this software. Any use, reproduction, disclosure, or distribution of
66
# this software and related documentation outside the terms of the EULA
77
# is strictly prohibited.
8+
import numpy as np
89
import pytest
10+
911
from cuda import cuda, cudart, nvrtc
10-
import numpy as np
12+
1113

1214
def ASSERT_DRV(err):
1315
if isinstance(err, cuda.CUresult):
1416
if err != cuda.CUresult.CUDA_SUCCESS:
15-
raise RuntimeError('Cuda Error: {}'.format(err))
17+
raise RuntimeError(f"Cuda Error: {err}")
1618
elif isinstance(err, cudart.cudaError_t):
1719
if err != cudart.cudaError_t.cudaSuccess:
18-
raise RuntimeError('Cudart Error: {}'.format(err))
20+
raise RuntimeError(f"Cudart Error: {err}")
1921
elif isinstance(err, nvrtc.nvrtcResult):
2022
if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
21-
raise RuntimeError('Nvrtc Error: {}'.format(err))
23+
raise RuntimeError(f"Nvrtc Error: {err}")
2224
else:
23-
raise RuntimeError('Unknown error type: {}'.format(err))
25+
raise RuntimeError(f"Unknown error type: {err}")
26+
2427

2528
@pytest.fixture
2629
def init_cuda():
2730
# Initialize
28-
err, = cuda.cuInit(0)
31+
(err,) = cuda.cuInit(0)
2932
ASSERT_DRV(err)
3033
err, device = cuda.cuDeviceGet(0)
3134
ASSERT_DRV(err)
@@ -38,31 +41,37 @@ def init_cuda():
3841

3942
yield device, ctx, stream
4043

41-
err, = cuda.cuStreamDestroy(stream)
44+
(err,) = cuda.cuStreamDestroy(stream)
4245
ASSERT_DRV(err)
43-
err, = cuda.cuCtxDestroy(ctx)
46+
(err,) = cuda.cuCtxDestroy(ctx)
4447
ASSERT_DRV(err)
4548

49+
4650
@pytest.fixture
4751
def load_module():
4852
module = None
53+
4954
def _load_module(kernel_string, device):
5055
nonlocal module
5156
# Get module
52-
err, major = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device)
57+
err, major = cuda.cuDeviceGetAttribute(
58+
cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device
59+
)
5360
ASSERT_DRV(err)
54-
err, minor = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device)
61+
err, minor = cuda.cuDeviceGetAttribute(
62+
cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device
63+
)
5564
ASSERT_DRV(err)
5665

57-
err, prog = nvrtc.nvrtcCreateProgram(str.encode(kernel_string), b'kernelString.cu', 0, [], [])
66+
err, prog = nvrtc.nvrtcCreateProgram(str.encode(kernel_string), b"kernelString.cu", 0, [], [])
5867
ASSERT_DRV(err)
59-
opts = [b'--fmad=false', bytes('--gpu-architecture=sm_' + str(major) + str(minor), 'ascii')]
60-
err, = nvrtc.nvrtcCompileProgram(prog, 2, opts)
68+
opts = [b"--fmad=false", bytes("--gpu-architecture=sm_" + str(major) + str(minor), "ascii")]
69+
(err,) = nvrtc.nvrtcCompileProgram(prog, 2, opts)
6170

6271
err_log, logSize = nvrtc.nvrtcGetProgramLogSize(prog)
6372
ASSERT_DRV(err_log)
64-
log = b' ' * logSize
65-
err_log, = nvrtc.nvrtcGetProgramLog(prog, log)
73+
log = b" " * logSize
74+
(err_log,) = nvrtc.nvrtcGetProgramLog(prog, log)
6675
ASSERT_DRV(err_log)
6776
result = log.decode()
6877
if len(result) > 1:
@@ -71,8 +80,8 @@ def _load_module(kernel_string, device):
7180
ASSERT_DRV(err)
7281
err, cubinSize = nvrtc.nvrtcGetCUBINSize(prog)
7382
ASSERT_DRV(err)
74-
cubin = b' ' * cubinSize
75-
err, = nvrtc.nvrtcGetCUBIN(prog, cubin)
83+
cubin = b" " * cubinSize
84+
(err,) = nvrtc.nvrtcGetCUBIN(prog, cubin)
7685
ASSERT_DRV(err)
7786
cubin = np.char.array(cubin)
7887
err, module = cuda.cuModuleLoadData(cubin)
@@ -82,5 +91,5 @@ def _load_module(kernel_string, device):
8291

8392
yield _load_module
8493

85-
err, = cuda.cuModuleUnload(module)
94+
(err,) = cuda.cuModuleUnload(module)
8695
ASSERT_DRV(err)

cuda_bindings/benchmarks/test_cupy.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,42 +5,47 @@
55
# this software. Any use, reproduction, disclosure, or distribution of
66
# this software and related documentation outside the terms of the EULA
77
# is strictly prohibited.
8-
import pytest
98
import ctypes
109

10+
import pytest
11+
1112
# Always skip since cupy is not CTK 12.x yet
1213
skip_tests = True
1314
if not skip_tests:
1415
try:
1516
import cupy
17+
1618
skip_tests = False
1719
except ImportError:
1820
skip_tests = True
1921

2022
from .kernels import kernel_string
2123

24+
2225
def launch(kernel, args=()):
2326
kernel((1,), (1,), args)
2427

28+
2529
# Measure launch latency with no parmaeters
2630
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
2731
@pytest.mark.benchmark(group="cupy")
2832
def test_launch_latency_empty_kernel(benchmark):
2933
module = cupy.RawModule(code=kernel_string)
30-
kernel = module.get_function('empty_kernel')
34+
kernel = module.get_function("empty_kernel")
3135

3236
stream = cupy.cuda.stream.Stream(non_blocking=True)
3337

3438
with stream:
3539
benchmark(launch, kernel)
3640
stream.synchronize()
3741

42+
3843
# Measure launch latency with a single parameter
3944
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
4045
@pytest.mark.benchmark(group="cupy")
4146
def test_launch_latency_small_kernel(benchmark):
4247
module = cupy.RawModule(code=kernel_string)
43-
kernel = module.get_function('small_kernel')
48+
kernel = module.get_function("small_kernel")
4449
cupy.cuda.set_allocator()
4550
arg = cupy.cuda.alloc(ctypes.sizeof(ctypes.c_float))
4651

@@ -50,12 +55,13 @@ def test_launch_latency_small_kernel(benchmark):
5055
benchmark(launch, kernel, (arg,))
5156
stream.synchronize()
5257

58+
5359
# Measure launch latency with many parameters using builtin parameter packing
5460
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
5561
@pytest.mark.benchmark(group="cupy")
5662
def test_launch_latency_small_kernel_512_args(benchmark):
5763
module = cupy.RawModule(code=kernel_string)
58-
kernel = module.get_function('small_kernel_512_args')
64+
kernel = module.get_function("small_kernel_512_args")
5965
cupy.cuda.set_allocator()
6066

6167
args = []
@@ -69,12 +75,13 @@ def test_launch_latency_small_kernel_512_args(benchmark):
6975
benchmark(launch, kernel, args)
7076
stream.synchronize()
7177

78+
7279
# Measure launch latency with many parameters using builtin parameter packing
7380
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
7481
@pytest.mark.benchmark(group="cupy")
7582
def test_launch_latency_small_kernel_512_bools(benchmark):
7683
module = cupy.RawModule(code=kernel_string)
77-
kernel = module.get_function('small_kernel_512_bools')
84+
kernel = module.get_function("small_kernel_512_bools")
7885
cupy.cuda.set_allocator()
7986

8087
args = [True] * 512
@@ -86,12 +93,13 @@ def test_launch_latency_small_kernel_512_bools(benchmark):
8693
benchmark(launch, kernel, args)
8794
stream.synchronize()
8895

96+
8997
# Measure launch latency with many parameters using builtin parameter packing
9098
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
9199
@pytest.mark.benchmark(group="cupy")
92100
def test_launch_latency_small_kernel_512_doubles(benchmark):
93101
module = cupy.RawModule(code=kernel_string)
94-
kernel = module.get_function('small_kernel_512_doubles')
102+
kernel = module.get_function("small_kernel_512_doubles")
95103
cupy.cuda.set_allocator()
96104

97105
args = [1.2345] * 512
@@ -103,12 +111,13 @@ def test_launch_latency_small_kernel_512_doubles(benchmark):
103111
benchmark(launch, kernel, args)
104112
stream.synchronize()
105113

114+
106115
# Measure launch latency with many parameters using builtin parameter packing
107116
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
108117
@pytest.mark.benchmark(group="cupy")
109118
def test_launch_latency_small_kernel_512_ints(benchmark):
110119
module = cupy.RawModule(code=kernel_string)
111-
kernel = module.get_function('small_kernel_512_ints')
120+
kernel = module.get_function("small_kernel_512_ints")
112121
cupy.cuda.set_allocator()
113122

114123
args = [123] * 512
@@ -120,12 +129,13 @@ def test_launch_latency_small_kernel_512_ints(benchmark):
120129
benchmark(launch, kernel, args)
121130
stream.synchronize()
122131

132+
123133
# Measure launch latency with many parameters using builtin parameter packing
124134
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
125135
@pytest.mark.benchmark(group="cupy")
126136
def test_launch_latency_small_kernel_512_bytes(benchmark):
127137
module = cupy.RawModule(code=kernel_string)
128-
kernel = module.get_function('small_kernel_512_chars')
138+
kernel = module.get_function("small_kernel_512_chars")
129139
cupy.cuda.set_allocator()
130140

131141
args = [127] * 512
@@ -137,12 +147,13 @@ def test_launch_latency_small_kernel_512_bytes(benchmark):
137147
benchmark(launch, kernel, args)
138148
stream.synchronize()
139149

150+
140151
# Measure launch latency with many parameters using builtin parameter packing
141152
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
142153
@pytest.mark.benchmark(group="cupy")
143154
def test_launch_latency_small_kernel_512_longlongs(benchmark):
144155
module = cupy.RawModule(code=kernel_string)
145-
kernel = module.get_function('small_kernel_512_longlongs')
156+
kernel = module.get_function("small_kernel_512_longlongs")
146157
cupy.cuda.set_allocator()
147158

148159
args = [9223372036854775806] * 512
@@ -154,12 +165,13 @@ def test_launch_latency_small_kernel_512_longlongs(benchmark):
154165
benchmark(launch, kernel, args)
155166
stream.synchronize()
156167

168+
157169
# Measure launch latency with many parameters using builtin parameter packing
158170
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
159171
@pytest.mark.benchmark(group="cupy")
160172
def test_launch_latency_small_kernel_256_args(benchmark):
161173
module = cupy.RawModule(code=kernel_string)
162-
kernel = module.get_function('small_kernel_256_args')
174+
kernel = module.get_function("small_kernel_256_args")
163175
cupy.cuda.set_allocator()
164176

165177
args = []
@@ -173,12 +185,13 @@ def test_launch_latency_small_kernel_256_args(benchmark):
173185
benchmark(launch, kernel, args)
174186
stream.synchronize()
175187

188+
176189
# Measure launch latency with many parameters using builtin parameter packing
177190
@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
178191
@pytest.mark.benchmark(group="cupy")
179192
def test_launch_latency_small_kernel_16_args(benchmark):
180193
module = cupy.RawModule(code=kernel_string)
181-
kernel = module.get_function('small_kernel_16_args')
194+
kernel = module.get_function("small_kernel_16_args")
182195
cupy.cuda.set_allocator()
183196

184197
args = []

0 commit comments

Comments
 (0)