Skip to content

Commit 92d41c2

Browse files
add the polybench testcases to the test folder, with golden files and a bash script for a user to generate the so files
1 parent eb6e432 commit 92d41c2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+14740
-0
lines changed

tests/polybench/__init__.py

Whitespace-only changes.

tests/polybench/benchmarks/2mm.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import sys
2+
3+
from helpers import parse_args
4+
from pydsl.type import Index, F32
5+
from pydsl.memref import MemRefFactory, DYNAMIC
6+
from pydsl.frontend import compile, CTarget
7+
from pydsl.affine import (
8+
affine_range as arange,
9+
)
10+
import numpy as np
11+
from timeit import timeit
12+
import ctypes
13+
14+
MemrefF32_IJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
15+
MemrefF32_IK = MemRefFactory((DYNAMIC, DYNAMIC), F32)
16+
MemrefF32_KJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
17+
MemrefF32_JL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
18+
MemrefF32_IL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
19+
20+
21+
def Twomm(
22+
ni: Index,
23+
nj: Index,
24+
nk: Index,
25+
nl: Index,
26+
alpha: F32,
27+
beta: F32,
28+
tmp: MemrefF32_IJ,
29+
A: MemrefF32_IK,
30+
B: MemrefF32_KJ,
31+
C: MemrefF32_JL,
32+
D_arr: MemrefF32_IL,
33+
) -> None:
34+
b: F32 = 0.0
35+
for i in arange(ni):
36+
for j in arange(nj):
37+
tmp[i, j] = b
38+
for k in arange(nk):
39+
tmp[i, j] = tmp[i, j] + alpha * A[i, k] * B[k, j]
40+
for i in arange(ni):
41+
for j in arange(nl):
42+
D_arr[i, j] = D_arr[i, j] * beta
43+
for k in arange(nj):
44+
D_arr[i, j] = D_arr[i, j] + tmp[i, k] * C[k, j]
45+
46+
47+
if __name__ == "__main__":
48+
datasets = {
49+
"MINI_DATASET": (16, 18, 22, 24),
50+
"SMALL_DATASET": (40, 50, 70, 80),
51+
"MEDIUM_DATASET": (180, 190, 210, 220),
52+
"LARGE_DATASET": (800, 900, 1100, 1200),
53+
"EXTRALARGE_DATASET": (1600, 1800, 2200, 2400),
54+
}
55+
parsed_args = parse_args(sys.argv, datasets)
56+
c_test = parsed_args["c_test"]
57+
compilation_target = parsed_args["compilation_target"]
58+
current_dataset = parsed_args["current_dataset"]
59+
output_array = parsed_args["output_array"]
60+
ctest_obj = parsed_args["ctest_obj"]
61+
62+
ni, nj, nk, nl = datasets.get(current_dataset, (40, 50, 70, 80))
63+
MemrefF32_IJ = MemRefFactory((ni, nj), F32)
64+
MemrefF32_IK = MemRefFactory((ni, nk), F32)
65+
MemrefF32_KJ = MemRefFactory((nk, nj), F32)
66+
MemrefF32_JL = MemRefFactory((nj, nl), F32)
67+
MemrefF32_IL = MemRefFactory((ni, nl), F32)
68+
69+
Twomm = compile(
70+
locals(),
71+
dump_mlir=False,
72+
auto_build=True,
73+
target_class=compilation_target,
74+
dataset=current_dataset,
75+
)(Twomm)
76+
77+
if c_test:
78+
lib = ctypes.CDLL(ctest_obj)
79+
Twomm_c = lib.kernel_2mm
80+
81+
Twomm_c.argtypes = [
82+
ctypes.c_int, # ni
83+
ctypes.c_int, # nj
84+
ctypes.c_int, # nk
85+
ctypes.c_int, # nl
86+
ctypes.c_float, # alpha
87+
ctypes.c_float, # beta
88+
ctypes.POINTER(ctypes.c_float), # tmp
89+
ctypes.POINTER(ctypes.c_float), # A
90+
ctypes.POINTER(ctypes.c_float), # B
91+
ctypes.POINTER(ctypes.c_float), # C
92+
ctypes.POINTER(ctypes.c_float), # D
93+
]
94+
95+
tmp = np.zeros((ni, nj)).astype(np.float32)
96+
a = np.zeros((ni, nk)).astype(np.float32)
97+
b = np.zeros((nk, nj)).astype(np.float32)
98+
c = np.zeros((nj, nl)).astype(np.float32)
99+
d = np.zeros((ni, nl)).astype(np.float32)
100+
# init array
101+
alpha = 1.5
102+
beta = 1.2
103+
for i in range(ni):
104+
for j in range(nk):
105+
a[i, j] = ((i * j + 1) % ni) / ni
106+
for i in range(nk):
107+
for j in range(nj):
108+
b[i, j] = (i * (j + 1) % nj) / nj
109+
for i in range(nj):
110+
for j in range(nl):
111+
c[i, j] = ((i * (j + 3) + 1) % nl) / nl
112+
for i in range(ni):
113+
for j in range(nl):
114+
d[i, j] = (i * (j + 2) % nk) / nk
115+
116+
a_copy = a.copy()
117+
b_copy = b.copy()
118+
c_copy = c.copy()
119+
d_copy = d.copy()
120+
121+
perf = timeit(
122+
lambda: Twomm(ni, nj, nk, nl, alpha, beta, tmp, a, b, c, d), number=1
123+
)
124+
if output_array:
125+
print("==BEGIN DUMP_ARRAYS==", file=sys.stderr)
126+
print("begin dump: D", end="", file=sys.stderr)
127+
for i in range(ni):
128+
for j in range(nl):
129+
if (i * ni + j) % 20 == 0:
130+
print("", file=sys.stderr)
131+
print(f"{d[i, j]:.2f} ", end="", file=sys.stderr)
132+
print("\nend dump: D", file=sys.stderr)
133+
print("==END DUMP_ARRAYS==", file=sys.stderr)
134+
print(perf)
135+
136+
if c_test:
137+
a_ptr = a_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
138+
b_ptr = b_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
139+
c_ptr = c_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
140+
d_ptr = d_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
141+
tmp_ptr = tmp.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
142+
143+
perf = timeit(
144+
lambda: Twomm_c(
145+
ni,
146+
nj,
147+
nk,
148+
nl,
149+
alpha,
150+
beta,
151+
tmp_ptr,
152+
a_ptr,
153+
b_ptr,
154+
c_ptr,
155+
d_ptr,
156+
),
157+
number=1,
158+
)
159+
max_difference = 0.0
160+
for i in range(ni):
161+
for j in range(nl):
162+
max_difference = max(
163+
max_difference, abs(d_copy[i, j] - d[i, j])
164+
)
165+
print(perf)
166+
if max_difference < 0.001:
167+
print("results are correct.")
168+
else:
169+
print(
170+
f"results incorrect! Max value difference is {max_difference:2f}"
171+
)
172+
exit(1)

tests/polybench/benchmarks/3mm.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import sys
2+
3+
from helpers import parse_args
4+
from pydsl.type import Index, F32
5+
from pydsl.memref import MemRefFactory, DYNAMIC
6+
from pydsl.frontend import compile
7+
from pydsl.affine import (
8+
affine_range as arange,
9+
)
10+
import numpy as np
11+
from timeit import timeit
12+
import ctypes
13+
14+
MemrefF32_IJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
15+
MemrefF32_IK = MemRefFactory((DYNAMIC, DYNAMIC), F32)
16+
MemrefF32_KJ = MemRefFactory((DYNAMIC, DYNAMIC), F32)
17+
MemrefF32_JL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
18+
MemrefF32_JM = MemRefFactory((DYNAMIC, DYNAMIC), F32)
19+
MemrefF32_ML = MemRefFactory((DYNAMIC, DYNAMIC), F32)
20+
MemrefF32_IL = MemRefFactory((DYNAMIC, DYNAMIC), F32)
21+
22+
23+
def Threemm(
24+
ni: Index,
25+
nj: Index,
26+
nk: Index,
27+
nl: Index,
28+
nm: Index,
29+
E: MemrefF32_IJ,
30+
A: MemrefF32_IK,
31+
B: MemrefF32_KJ,
32+
F: MemrefF32_JL,
33+
C: MemrefF32_JM,
34+
D_arr: MemrefF32_ML,
35+
G: MemrefF32_IL,
36+
) -> F32:
37+
b: F32 = 0.0
38+
for i in arange(ni):
39+
for j in arange(nj):
40+
E[i, j] = b
41+
for k in arange(nk):
42+
E[i, j] = E[i, j] + A[i, k] * B[k, j]
43+
for i in arange(nj):
44+
for j in arange(nl):
45+
F[i, j] = b
46+
for k in arange(nm):
47+
F[i, j] = F[i, j] + C[i, k] * D_arr[k, j]
48+
for i in arange(ni):
49+
for j in arange(nl):
50+
G[i, j] = b
51+
for k in arange(nj):
52+
G[i, j] = G[i, j] + E[i, k] * F[k, j]
53+
54+
return b
55+
56+
57+
if __name__ == "__main__":
58+
datasets = {
59+
"MINI_DATASET": (16, 18, 20, 22, 24),
60+
"SMALL_DATASET": (40, 50, 60, 70, 80),
61+
"MEDIUM_DATASET": (180, 190, 200, 210, 220),
62+
"LARGE_DATASET": (800, 900, 1000, 1100, 1200),
63+
"EXTRALARGE_DATASET": (1600, 1800, 2000, 2200, 2400),
64+
}
65+
parsed_args = parse_args(sys.argv, datasets)
66+
c_test = parsed_args["c_test"]
67+
compilation_target = parsed_args["compilation_target"]
68+
current_dataset = parsed_args["current_dataset"]
69+
output_array = parsed_args["output_array"]
70+
ctest_obj = parsed_args["ctest_obj"]
71+
72+
ni, nj, nk, nl, nm = datasets.get(current_dataset, (40, 50, 60, 70, 80))
73+
74+
MemrefF32_IJ = MemRefFactory((ni, nj), F32)
75+
MemrefF32_IK = MemRefFactory((ni, nk), F32)
76+
MemrefF32_KJ = MemRefFactory((nk, nj), F32)
77+
MemrefF32_JL = MemRefFactory((nj, nl), F32)
78+
MemrefF32_JM = MemRefFactory((nj, nm), F32)
79+
MemrefF32_ML = MemRefFactory((nm, nl), F32)
80+
MemrefF32_IL = MemRefFactory((ni, nl), F32)
81+
82+
Threemm = compile(
83+
locals(),
84+
dump_mlir=False,
85+
auto_build=True,
86+
target_class=compilation_target,
87+
dataset=current_dataset,
88+
)(Threemm)
89+
90+
if c_test:
91+
lib = ctypes.CDLL(ctest_obj)
92+
Threemm_c = lib.kernel_3mm
93+
94+
Threemm_c.argtypes = [
95+
ctypes.c_int, # ni
96+
ctypes.c_int, # nj
97+
ctypes.c_int, # nk
98+
ctypes.c_int, # nl
99+
ctypes.c_int, # nm
100+
ctypes.POINTER(ctypes.c_float), # E
101+
ctypes.POINTER(ctypes.c_float), # A
102+
ctypes.POINTER(ctypes.c_float), # B
103+
ctypes.POINTER(ctypes.c_float), # F
104+
ctypes.POINTER(ctypes.c_float), # C
105+
ctypes.POINTER(ctypes.c_float), # D
106+
ctypes.POINTER(ctypes.c_float), # G
107+
]
108+
109+
e = np.zeros((ni, nj)).astype(np.float32)
110+
a = np.zeros((ni, nk)).astype(np.float32)
111+
b = np.zeros((nk, nj)).astype(np.float32)
112+
f = np.zeros((nj, nl)).astype(np.float32)
113+
c = np.zeros((nj, nm)).astype(np.float32)
114+
d = np.zeros((nm, nl)).astype(np.float32)
115+
g = np.zeros((ni, nl)).astype(np.float32)
116+
# init array
117+
alpha = 1.5
118+
beta = 1.2
119+
for i in range(ni):
120+
for j in range(nk):
121+
a[i, j] = ((i * j + 1) % ni) / (5 * ni)
122+
for i in range(nk):
123+
for j in range(nj):
124+
b[i, j] = ((i * (j + 1) + 2) % nj) / (5 * nj)
125+
for i in range(nj):
126+
for j in range(nm):
127+
c[i, j] = ((i * (j + 3)) % nl) / (5 * nl)
128+
for i in range(nm):
129+
for j in range(nl):
130+
d[i, j] = ((i * (j + 2) + 2) % nk) / (5 * nk)
131+
e_copy = e.copy()
132+
a_copy = a.copy()
133+
b_copy = b.copy()
134+
f_copy = f.copy()
135+
c_copy = c.copy()
136+
d_copy = d.copy()
137+
g_copy = g.copy()
138+
139+
perf = timeit(
140+
lambda: Threemm(ni, nj, nk, nl, nm, e, a, b, f, c, d, g), number=1
141+
)
142+
if output_array:
143+
print("==BEGIN DUMP_ARRAYS==", file=sys.stderr)
144+
print("begin dump: G", end="", file=sys.stderr)
145+
for i in range(ni):
146+
for j in range(nl):
147+
if (i * ni + j) % 20 == 0:
148+
print("", file=sys.stderr)
149+
print(f"{g[i, j]:.2f} ", end="", file=sys.stderr)
150+
print("\nend dump: G", file=sys.stderr)
151+
print("==END DUMP_ARRAYS==", file=sys.stderr)
152+
print(perf)
153+
154+
if c_test:
155+
a_ptr = a_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
156+
b_ptr = b_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
157+
c_ptr = c_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
158+
d_ptr = d_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
159+
e_ptr = e_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
160+
f_ptr = f_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
161+
g_ptr = g_copy.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
162+
163+
perf = timeit(
164+
lambda: Threemm_c(
165+
ni,
166+
nj,
167+
nk,
168+
nl,
169+
nm,
170+
e_ptr,
171+
a_ptr,
172+
b_ptr,
173+
f_ptr,
174+
c_ptr,
175+
d_ptr,
176+
g_ptr,
177+
),
178+
number=1,
179+
)
180+
max_difference = 0.0
181+
for i in range(ni):
182+
for j in range(nl):
183+
max_difference = max(
184+
max_difference, abs(g_copy[i, j] - g[i, j])
185+
)
186+
print(perf)
187+
if max_difference < 0.001:
188+
print("results are correct.")
189+
else:
190+
print(
191+
f"results incorrect! Max value difference is {max_difference:2f}"
192+
)
193+
exit(1)

0 commit comments

Comments
 (0)