Skip to content

Commit fb7b109

Browse files
add the polybench testcases to the test folder, with golden files and a bash script for a user to generate the so files
1 parent beb22b8 commit fb7b109

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+14675
-0
lines changed

tests/polybench/README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Overview
2+
3+
This test suite implements the [PolyBench](https://sourceforge.net/projects/polybench/) test cases in PyDSL.
4+
This comprises 30 tests designed to be a benchmark for various application domains.
5+
Currently, this test suite does not have a nussinov implementation, since LLVM-19
6+
does not have MLIR python bindings for the affine.if op.
7+
8+
## Usage
9+
10+
Generally these testcases will just be run as normal when a user runs `hatch test`.
11+
In this case it will run the PyDSL implementation of each benchmark, and then
12+
check it against the hardcoded expected output found in `golden_files`. If you
13+
want to run a particular benchmark yourself to see the output, you can simply
14+
run `python benchmark_name.py` in the `benchmarks` directory,
15+
where `benchmark_name` is the name of the benchmark you want to run, and it
16+
will print the SMALL_DATASET output array as well as the amount of time the
17+
benchmark took to run.
18+
19+
### testing against PolyBenchC
20+
21+
These testcases can also check against the original PolyBench C implementation.
22+
In order to do so, first download the [PolyBench/C](https://sourceforge.net/projects/polybench/) test suite, and then
23+
place the `PolyBenchC-4.2.1-master` folder in the `polybench` directory (make sure that it has
24+
utilities, linear-algebra, etc as immediate subdirectories).
25+
Then you can simply run `bash generate_polybench_folder.sh PolyBenchC-4.2.1-master`
26+
and the script will automatically compile all the required .so files and store them in
27+
a directory called `polybench_so_files`. Now simply by calling `hatch test` the
28+
testcases will be instead checked against the actual C implementations of PolyBench,
29+
instead of simply against the hardcoded golden files.

tests/polybench/__init__.py

Whitespace-only changes.

tests/polybench/benchmarks/2mm.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
import sys
2+
3+
4+
from pydsl.type import Index, F32
5+
from pydsl.memref import MemRefFactory, DYNAMIC
6+
from pydsl.frontend import compile
7+
from pydsl.affine import (
8+
affine_range as arange,
9+
)
10+
import numpy as np
11+
from timeit import timeit
12+
import ctypes
13+
14+
MemrefF32_IJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
15+
MemrefF32_IK = MemRefFactory((DYNAMIC, DYNAMIC), F32)
16+
MemrefF32_KJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
17+
MemrefF32_JL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
18+
MemrefF32_IL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
19+
20+
21+
@compile()
22+
def Twomm(
23+
ni: Index,
24+
nj: Index,
25+
nk: Index,
26+
nl: Index,
27+
alpha: F32,
28+
beta: F32,
29+
tmp: MemrefF32_IJ,
30+
A: MemrefF32_IK,
31+
B: MemrefF32_KJ,
32+
C: MemrefF32_JL,
33+
D_arr: MemrefF32_IL,
34+
) -> None:
35+
b: F32 = 0.0
36+
for i in arange(ni):
37+
for j in arange(nj):
38+
tmp[i, j] = b
39+
for k in arange(nk):
40+
tmp[i, j] = tmp[i, j] + alpha * A[i, k] * B[k, j]
41+
for i in arange(ni):
42+
for j in arange(nl):
43+
D_arr[i, j] = D_arr[i, j] * beta
44+
for k in arange(nj):
45+
D_arr[i, j] = D_arr[i, j] + tmp[i, k] * C[k, j]
46+
47+
48+
def main(
49+
current_dataset: str, output_array: bool, c_test: bool, ctest_obj: str
50+
):
51+
datasets = {
52+
"MINI_DATASET": (16, 18, 22, 24),
53+
"SMALL_DATASET": (40, 50, 70, 80),
54+
"MEDIUM_DATASET": (180, 190, 210, 220),
55+
"LARGE_DATASET": (800, 900, 1100, 1200),
56+
"EXTRALARGE_DATASET": (1600, 1800, 2200, 2400),
57+
}
58+
59+
ni, nj, nk, nl = datasets.get(current_dataset, (40, 50, 70, 80))
60+
61+
results = {}
62+
results["array"] = ""
63+
results["perf"] = -1.0
64+
results["c_correctness"] = ""
65+
results["c_perf"] = -1.0
66+
if c_test:
67+
lib = ctypes.CDLL(ctest_obj)
68+
Twomm_c = lib.kernel_2mm
69+
70+
Twomm_c.argtypes = [
71+
ctypes.c_int, # ni
72+
ctypes.c_int, # nj
73+
ctypes.c_int, # nk
74+
ctypes.c_int, # nl
75+
ctypes.c_float, # alpha
76+
ctypes.c_float, # beta
77+
ctypes.POINTER(ctypes.c_float), # tmp
78+
ctypes.POINTER(ctypes.c_float), # A
79+
ctypes.POINTER(ctypes.c_float), # B
80+
ctypes.POINTER(ctypes.c_float), # C
81+
ctypes.POINTER(ctypes.c_float), # D
82+
]
83+
84+
tmp = np.zeros((ni, nj)).astype(np.float32)
85+
a = np.zeros((ni, nk)).astype(np.float32)
86+
b = np.zeros((nk, nj)).astype(np.float32)
87+
c = np.zeros((nj, nl)).astype(np.float32)
88+
d = np.zeros((ni, nl)).astype(np.float32)
89+
# init array
90+
alpha = 1.5
91+
beta = 1.2
92+
for i in range(ni):
93+
for j in range(nk):
94+
a[i, j] = ((i * j + 1) % ni) / ni
95+
for i in range(nk):
96+
for j in range(nj):
97+
b[i, j] = (i * (j + 1) % nj) / nj
98+
for i in range(nj):
99+
for j in range(nl):
100+
c[i, j] = ((i * (j + 3) + 1) % nl) / nl
101+
for i in range(ni):
102+
for j in range(nl):
103+
d[i, j] = (i * (j + 2) % nk) / nk
104+
105+
a_copy = a.copy()
106+
b_copy = b.copy()
107+
c_copy = c.copy()
108+
d_copy = d.copy()
109+
110+
perf = timeit(
111+
lambda: Twomm(ni, nj, nk, nl, alpha, beta, tmp, a, b, c, d), number=1
112+
)
113+
if output_array:
114+
arr_out = "==BEGIN DUMP_ARRAYS==\n"
115+
arr_out += "begin dump: D"
116+
for i in range(ni):
117+
for j in range(nl):
118+
if (i * ni + j) % 20 == 0:
119+
arr_out += "\n"
120+
arr_out += f"{d[i, j]:.2f} "
121+
arr_out += "\nend dump: D\n"
122+
arr_out += "==END DUMP_ARRAYS==\n"
123+
results["array"] = arr_out
124+
results["perf"] = perf
125+
126+
if c_test:
127+
a_ptr = a_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
128+
b_ptr = b_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
129+
c_ptr = c_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
130+
d_ptr = d_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
131+
tmp_ptr = tmp.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
132+
133+
perf = timeit(
134+
lambda: Twomm_c(
135+
ni,
136+
nj,
137+
nk,
138+
nl,
139+
alpha,
140+
beta,
141+
tmp_ptr,
142+
a_ptr,
143+
b_ptr,
144+
c_ptr,
145+
d_ptr,
146+
),
147+
number=1,
148+
)
149+
max_difference = 0.0
150+
for i in range(ni):
151+
for j in range(nl):
152+
max_difference = max(
153+
max_difference, abs(d_copy[i, j] - d[i, j])
154+
)
155+
results["c_perf"] = perf
156+
if max_difference < 0.001:
157+
results["c_correctness"] = "results are correct."
158+
else:
159+
results["c_correctness"] = (
160+
f"results incorrect! Max value difference is {max_difference:2f}"
161+
)
162+
return results
163+
164+
if __name__ == "__main__":
165+
result = main("SMALL_DATASET", True, False, "")
166+
print(result["array"])
167+
print(result["perf"])

tests/polybench/benchmarks/3mm.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import sys
2+
3+
4+
from pydsl.type import Index, F32
5+
from pydsl.memref import MemRefFactory, DYNAMIC
6+
from pydsl.frontend import compile
7+
from pydsl.affine import (
8+
affine_range as arange,
9+
)
10+
import numpy as np
11+
from timeit import timeit
12+
import ctypes
13+
14+
MemrefF32_IJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
15+
MemrefF32_IK = MemRefFactory((DYNAMIC, DYNAMIC), F32)
16+
MemrefF32_KJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
17+
MemrefF32_JL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
18+
MemrefF32_JM = MemRefFactory((DYNAMIC, DYNAMIC), F32)
19+
MemrefF32_ML = MemRefFactory((DYNAMIC, DYNAMIC), F32)
20+
MemrefF32_IL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
21+
22+
23+
@compile()
24+
def Threemm(
25+
ni: Index,
26+
nj: Index,
27+
nk: Index,
28+
nl: Index,
29+
nm: Index,
30+
E: MemrefF32_IJ,
31+
A: MemrefF32_IK,
32+
B: MemrefF32_KJ,
33+
F: MemrefF32_JL,
34+
C: MemrefF32_JM,
35+
D_arr: MemrefF32_ML,
36+
G: MemrefF32_IL,
37+
) -> F32:
38+
b: F32 = 0.0
39+
for i in arange(ni):
40+
for j in arange(nj):
41+
E[i, j] = b
42+
for k in arange(nk):
43+
E[i, j] = E[i, j] + A[i, k] * B[k, j]
44+
for i in arange(nj):
45+
for j in arange(nl):
46+
F[i, j] = b
47+
for k in arange(nm):
48+
F[i, j] = F[i, j] + C[i, k] * D_arr[k, j]
49+
for i in arange(ni):
50+
for j in arange(nl):
51+
G[i, j] = b
52+
for k in arange(nj):
53+
G[i, j] = G[i, j] + E[i, k] * F[k, j]
54+
55+
return b
56+
57+
58+
def main(
59+
current_dataset: str, output_array: bool, c_test: bool, ctest_obj: str
60+
):
61+
datasets = {
62+
"MINI_DATASET": (16, 18, 20, 22, 24),
63+
"SMALL_DATASET": (40, 50, 60, 70, 80),
64+
"MEDIUM_DATASET": (180, 190, 200, 210, 220),
65+
"LARGE_DATASET": (800, 900, 1000, 1100, 1200),
66+
"EXTRALARGE_DATASET": (1600, 1800, 2000, 2200, 2400),
67+
}
68+
69+
ni, nj, nk, nl, nm = datasets.get(current_dataset, (40, 50, 60, 70, 80))
70+
71+
results = {}
72+
results["array"] = ""
73+
results["perf"] = -1.0
74+
results["c_correctness"] = ""
75+
results["c_perf"] = -1.0
76+
if c_test:
77+
lib = ctypes.CDLL(ctest_obj)
78+
Threemm_c = lib.kernel_3mm
79+
80+
Threemm_c.argtypes = [
81+
ctypes.c_int, # ni
82+
ctypes.c_int, # nj
83+
ctypes.c_int, # nk
84+
ctypes.c_int, # nl
85+
ctypes.c_int, # nm
86+
ctypes.POINTER(ctypes.c_float), # E
87+
ctypes.POINTER(ctypes.c_float), # A
88+
ctypes.POINTER(ctypes.c_float), # B
89+
ctypes.POINTER(ctypes.c_float), # F
90+
ctypes.POINTER(ctypes.c_float), # C
91+
ctypes.POINTER(ctypes.c_float), # D
92+
ctypes.POINTER(ctypes.c_float), # G
93+
]
94+
95+
e = np.zeros((ni, nj)).astype(np.float32)
96+
a = np.zeros((ni, nk)).astype(np.float32)
97+
b = np.zeros((nk, nj)).astype(np.float32)
98+
f = np.zeros((nj, nl)).astype(np.float32)
99+
c = np.zeros((nj, nm)).astype(np.float32)
100+
d = np.zeros((nm, nl)).astype(np.float32)
101+
g = np.zeros((ni, nl)).astype(np.float32)
102+
# init array
103+
alpha = 1.5
104+
beta = 1.2
105+
for i in range(ni):
106+
for j in range(nk):
107+
a[i, j] = ((i * j + 1) % ni) / (5 * ni)
108+
for i in range(nk):
109+
for j in range(nj):
110+
b[i, j] = ((i * (j + 1) + 2) % nj) / (5 * nj)
111+
for i in range(nj):
112+
for j in range(nm):
113+
c[i, j] = ((i * (j + 3)) % nl) / (5 * nl)
114+
for i in range(nm):
115+
for j in range(nl):
116+
d[i, j] = ((i * (j + 2) + 2) % nk) / (5 * nk)
117+
e_copy = e.copy()
118+
a_copy = a.copy()
119+
b_copy = b.copy()
120+
f_copy = f.copy()
121+
c_copy = c.copy()
122+
d_copy = d.copy()
123+
g_copy = g.copy()
124+
125+
perf = timeit(
126+
lambda: Threemm(ni, nj, nk, nl, nm, e, a, b, f, c, d, g), number=1
127+
)
128+
if output_array:
129+
arr_out = "==BEGIN DUMP_ARRAYS==\n"
130+
arr_out += "begin dump: G"
131+
for i in range(ni):
132+
for j in range(nl):
133+
if (i * ni + j) % 20 == 0:
134+
arr_out += "\n"
135+
arr_out += f"{g[i, j]:.2f} "
136+
arr_out += "\nend dump: G\n"
137+
arr_out += "==END DUMP_ARRAYS==\n"
138+
results["array"] = arr_out
139+
results["perf"] = perf
140+
141+
if c_test:
142+
a_ptr = a_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
143+
b_ptr = b_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
144+
c_ptr = c_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
145+
d_ptr = d_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
146+
e_ptr = e_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
147+
f_ptr = f_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
148+
g_ptr = g_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
149+
150+
perf = timeit(
151+
lambda: Threemm_c(
152+
ni,
153+
nj,
154+
nk,
155+
nl,
156+
nm,
157+
e_ptr,
158+
a_ptr,
159+
b_ptr,
160+
f_ptr,
161+
c_ptr,
162+
d_ptr,
163+
g_ptr,
164+
),
165+
number=1,
166+
)
167+
max_difference = 0.0
168+
for i in range(ni):
169+
for j in range(nl):
170+
max_difference = max(
171+
max_difference, abs(g_copy[i, j] - g[i, j])
172+
)
173+
results["c_perf"] = perf
174+
if max_difference < 0.001:
175+
results["c_correctness"] = "results are correct."
176+
else:
177+
results["c_correctness"] = (
178+
f"results incorrect! Max value difference is {max_difference:2f}"
179+
)
180+
return results
181+
182+
if __name__ == "__main__":
183+
result = main("SMALL_DATASET", True, False, "")
184+
print(result["array"])
185+
print(result["perf"])

0 commit comments

Comments
 (0)