Skip to content

Commit fabf4fe

Browse files
committed
collector script for cpuv8 report
1 parent 0fa469d commit fabf4fe

File tree

1 file changed

+212
-0
lines changed

1 file changed

+212
-0
lines changed

cpuv8_collector.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
#!/usr/bin/env python3
2+
import os, argparse, glob, csv, sys, re, subprocess
3+
from collections import defaultdict
4+
5+
# Extract from inrate/fprate/intspeed/fpspeed.bset in cpu2017/benchspec/CPU
6+
# class field is for future usage
7+
workload_class='''\
8+
workload,class
9+
10+
706.stockfish_r,int_rate
11+
707.ntest_r,int_rate
12+
708.sqlite_r,int_rate
13+
710.omnetpp_r,int_rate
14+
714.cpython_r,int_rate
15+
721.gcc_r,int_rate
16+
723.llvm_r,int_rate
17+
727.cppcheck_r,int_rate
18+
729.abc_r,int_rate
19+
734.vpr_r,int_rate
20+
735.gem5_r,int_rate
21+
750.sealcrypto_r,int_rate
22+
753.ns3_r,int_rate
23+
760.rocksdb_r,int_rate
24+
777.zstd_r,int_rate
25+
26+
709.cactus_r,fp_rate
27+
722.palm_r,fp_rate
28+
731.astcenc_r,fp_rate
29+
736.ocio_r,fp_rate
30+
737.gmsh_r,fp_rate
31+
748.flightdm_r,fp_rate
32+
749.fotonik3d_r"
33+
752.whisper_r,fp_rate
34+
765.roms_r,fp_rate
35+
766.femflow_r,fp_rate
36+
767.nest_r,fp_rate
37+
772.marian_r,fp_rate
38+
782.lbm_r,fp_rate
39+
40+
801.xz_s,int_speed
41+
807.ntest_s,int_speed
42+
817.flac_s,int_speed
43+
821.gcc_s,int_speed
44+
823.llvm_s,int_speed
45+
827.cppcheck_s,int_speed
46+
829.abc_s,int_speed
47+
834.vpr_s,int_speed
48+
835.gem5_s,int_speed
49+
838.diamond_s,int_speed
50+
846.minizinc_s,int_speed
51+
853.ns3_s,int_speed
52+
854.graph500_s,int_speed
53+
54+
800.pot3d_s,fp_speed
55+
803.sph_exa_s,fp_speed
56+
809.cactus_s,fp_speed
57+
811.tealeaf_s,fp_speed
58+
816.nab_s,fp_speed
59+
820.cloverleaf_s,fp_speed
60+
822.palm_s,fp_speed
61+
849.fotonik3d_s,fp_speed
62+
852.whisper_s,fp_speed
63+
857.namd_s,fp_speed
64+
865.roms_s,fp_speed
65+
867.nest_s,fp_speed
66+
872.marian_s,fp_speed
67+
881.neutron_s,fp_speed
68+
'''
69+
70+
reader = csv.DictReader(workload_class.split('\n'))
71+
workloads_classes = {}
72+
for line in reader:
73+
workloads_classes[line['workload']] = line['class']
74+
75+
all_workloads = workloads_classes.keys()
76+
wl_error_state = {}
77+
78+
def get_workload_status(workload, log_dir, label):
79+
label_filter = f"Label.*=.*{label}"
80+
filter_grep = f'grep -r --include="*.log" {label_filter} {log_dir}'
81+
filter_res=subprocess.run(filter_grep, shell=True, capture_output=True, text=True)
82+
filenames_str = None
83+
if filter_res.stderr:
84+
print(filter_res.stderr)
85+
return None
86+
if filter_res.stdout:
87+
filenames = [line.split(':')[0] for line in filter_res.stdout.splitlines()]
88+
filenames_str = ' '.join(filenames)
89+
90+
command = f'echo {filenames_str} | xargs grep -r --include="*.log" "Error.*"'
91+
res=subprocess.run(command, shell=True, capture_output=True, text=True)
92+
93+
if res.stderr:
94+
print(res.stderr)
95+
return None
96+
97+
if res.stdout:
98+
bench_pattern = '[78]\d{2}\.[a-zA-Z0-9]+_[rs]'
99+
BE_reg = f'Error building.*({bench_pattern})'
100+
SE_reg = f'Error during.*setup for({bench_pattern})'
101+
RE_reg = f'Error ({bench_pattern}).*errorcode=RE'
102+
VE_reg = f'Error ({bench_pattern}).*errorcode=VE'
103+
104+
BE_benches = {match for match in re.findall(BE_reg, res.stdout)}
105+
BE_benches = list(BE_benches)
106+
SE_benches = {match for match in re.findall(SE_reg, res.stdout)}
107+
SE_benches = list(SE_benches)
108+
RE_benches = {match for match in re.findall(RE_reg, res.stdout)}
109+
RE_benches = list(RE_benches)
110+
VE_benches = {match for match in re.findall(VE_reg, res.stdout)}
111+
VE_benches = list(VE_benches)
112+
113+
for b in BE_benches:
114+
wl_error_state[b] = 'BE (build error)'
115+
for b in SE_benches:
116+
wl_error_state[b] = 'SE (setup error)'
117+
for b in RE_benches:
118+
wl_error_state[b] = 'RE (runtime error)'
119+
for b in VE_benches:
120+
wl_error_state[b] = 'VE (validation error)'
121+
122+
if workload in wl_error_state.keys():
123+
return wl_error_state[workload]
124+
125+
return 'Success'
126+
127+
def get_path(directory, size, label, num, classes, workloads):
128+
cpu_dir = os.path.join(directory, 'benchspec/CPU')
129+
run_dir = f'run_base_{size}_{label}.{num}'
130+
log_dir = f'{directory}/result'
131+
speccmds_pattern = f'run/{run_dir}/speccmds.cmd'
132+
csv_dict_list = list(defaultdict(str))
133+
134+
if not os.path.exists(log_dir):
135+
print(f'Error: cannot find last run result for {label}', file=sys.stderr)
136+
return None
137+
138+
for workload in workloads:
139+
assert workload in all_workloads, f'unsupport workload {workload}'
140+
141+
workload_dict = defaultdict(str)
142+
workload_dict['name'] = workload
143+
144+
wl_state = get_workload_status(workload, log_dir, label)
145+
workload_dict['status'] = wl_state
146+
if classes:
147+
workload_dict['class'] = workloads_classes[workload]
148+
if not wl_state:
149+
print(f'warning: error searching run logs for {workload} {label}')
150+
elif wl_state != 'Success':
151+
print(f'warning: benchmark {workload} {label} failed: {wl_state}',
152+
file=sys.stderr)
153+
csv_dict_list.append(workload_dict)
154+
continue
155+
speccmds_path = os.path.join(cpu_dir, workload + '*', speccmds_pattern)
156+
speccmds_files = glob.glob(speccmds_path)
157+
if not speccmds_files:
158+
print(f'warning: cannot find speccmds.cmd for {workload} with input:{size}, label:{label}', file=sys.stderr)
159+
continue
160+
speccmds_abspath = os.path.abspath(speccmds_files[0])
161+
directory = os.path.dirname(speccmds_abspath)
162+
163+
with open(speccmds_abspath, 'r') as speccmds_file:
164+
exe = None
165+
err_files = []
166+
# Assume SDE profiling data is writtern to stderr files.
167+
file_regex = re.compile(r'.*\s-e\s([\w\.-]+)\s.*'+ run_dir + r'/([\w\.-]+)')
168+
for line in speccmds_file:
169+
if matches := file_regex.match(line):
170+
new_exe = os.path.basename(matches.group(2))
171+
if exe:
172+
assert new_exe == exe, 'more than 1 exe'
173+
else:
174+
exe = new_exe
175+
err_files.append(os.path.join(directory, os.path.basename(matches.group(1))))
176+
177+
assert exe, 'not found exe'
178+
assert err_files, 'not found err files'
179+
workload_dict['exe'] = os.path.join(directory, exe)
180+
workload_dict['sim_files'] = ','.join(err_files)
181+
182+
csv_dict_list.append(workload_dict)
183+
184+
return csv_dict_list
185+
186+
if __name__ == '__main__':
187+
parser = argparse.ArgumentParser(
188+
description='Get paths of binaries and SDE perf data for cpu2017 (version 1.1.8), assuming perf data is written to stderr, e.g. for sde, "-omix /dev/stderr -top_blocks -1 -dynamic_stats_per_block" is used in the submit')
189+
parser.add_argument('dir', help='directory of cpu2017')
190+
parser.add_argument('--size', choices=['test', 'train', 'ref'])
191+
parser.add_argument('--label', required=True, help='label used in cpu2017 config file')
192+
parser.add_argument('--num', default='0000', help='run number')
193+
parser.add_argument('--workloads', help='intersting workloads, which can be a subset {}'.format(','.join(all_workloads)))
194+
parser.add_argument('--filter', choices=['speed', 'rate'])
195+
parser.add_argument('--classes', action='store_true', help='add class info: int_rate, fp_rate, int_speed, fp_speed')
196+
parser.add_argument('-o', '--output', required=True, help='output CSV for the paths')
197+
args = parser.parse_args()
198+
199+
workloads = args.workloads.split(',') if args.workloads else all_workloads
200+
if args.filter == 'speed':
201+
workloads = [workload for workload in workloads if workload.endswith('_s')]
202+
elif args.filter == 'rate':
203+
workloads = [workload for workload in workloads if workload.endswith('_r')]
204+
205+
csv_dict_list = get_path(args.dir, args.size, args.label, args.num, args.classes, workloads)
206+
if csv_dict_list:
207+
with open(args.output, 'w') as csv_file:
208+
header = csv_dict_list[0].keys()
209+
csv_writer = csv.DictWriter(csv_file, fieldnames=header)
210+
csv_writer.writeheader()
211+
for row in csv_dict_list:
212+
csv_writer.writerow(row)

0 commit comments

Comments
 (0)