1
+ #!/usr/bin/env python3
2
+ import os , argparse , glob , csv , sys , re , subprocess
3
+ from collections import defaultdict
4
+
5
+ # Extract from inrate/fprate/intspeed/fpspeed.bset in cpu2017/benchspec/CPU
6
+ # class field is for future usage
7
+ workload_class = '''\
8
+ workload,class
9
+
10
+ 706.stockfish_r,int_rate
11
+ 707.ntest_r,int_rate
12
+ 708.sqlite_r,int_rate
13
+ 710.omnetpp_r,int_rate
14
+ 714.cpython_r,int_rate
15
+ 721.gcc_r,int_rate
16
+ 723.llvm_r,int_rate
17
+ 727.cppcheck_r,int_rate
18
+ 729.abc_r,int_rate
19
+ 734.vpr_r,int_rate
20
+ 735.gem5_r,int_rate
21
+ 750.sealcrypto_r,int_rate
22
+ 753.ns3_r,int_rate
23
+ 760.rocksdb_r,int_rate
24
+ 777.zstd_r,int_rate
25
+
26
+ 709.cactus_r,fp_rate
27
+ 722.palm_r,fp_rate
28
+ 731.astcenc_r,fp_rate
29
+ 736.ocio_r,fp_rate
30
+ 737.gmsh_r,fp_rate
31
+ 748.flightdm_r,fp_rate
32
+ 749.fotonik3d_r"
33
+ 752.whisper_r,fp_rate
34
+ 765.roms_r,fp_rate
35
+ 766.femflow_r,fp_rate
36
+ 767.nest_r,fp_rate
37
+ 772.marian_r,fp_rate
38
+ 782.lbm_r,fp_rate
39
+
40
+ 801.xz_s,int_speed
41
+ 807.ntest_s,int_speed
42
+ 817.flac_s,int_speed
43
+ 821.gcc_s,int_speed
44
+ 823.llvm_s,int_speed
45
+ 827.cppcheck_s,int_speed
46
+ 829.abc_s,int_speed
47
+ 834.vpr_s,int_speed
48
+ 835.gem5_s,int_speed
49
+ 838.diamond_s,int_speed
50
+ 846.minizinc_s,int_speed
51
+ 853.ns3_s,int_speed
52
+ 854.graph500_s,int_speed
53
+
54
+ 800.pot3d_s,fp_speed
55
+ 803.sph_exa_s,fp_speed
56
+ 809.cactus_s,fp_speed
57
+ 811.tealeaf_s,fp_speed
58
+ 816.nab_s,fp_speed
59
+ 820.cloverleaf_s,fp_speed
60
+ 822.palm_s,fp_speed
61
+ 849.fotonik3d_s,fp_speed
62
+ 852.whisper_s,fp_speed
63
+ 857.namd_s,fp_speed
64
+ 865.roms_s,fp_speed
65
+ 867.nest_s,fp_speed
66
+ 872.marian_s,fp_speed
67
+ 881.neutron_s,fp_speed
68
+ '''
69
+
70
+ reader = csv .DictReader (workload_class .split ('\n ' ))
71
+ workloads_classes = {}
72
+ for line in reader :
73
+ workloads_classes [line ['workload' ]] = line ['class' ]
74
+
75
+ all_workloads = workloads_classes .keys ()
76
+ wl_error_state = {}
77
+
78
+ def get_workload_status (workload , log_dir , label ):
79
+ label_filter = f"Label.*=.*{ label } "
80
+ filter_grep = f'grep -r --include="*.log" { label_filter } { log_dir } '
81
+ filter_res = subprocess .run (filter_grep , shell = True , capture_output = True , text = True )
82
+ filenames_str = None
83
+ if filter_res .stderr :
84
+ print (filter_res .stderr )
85
+ return None
86
+ if filter_res .stdout :
87
+ filenames = [line .split (':' )[0 ] for line in filter_res .stdout .splitlines ()]
88
+ filenames_str = ' ' .join (filenames )
89
+
90
+ command = f'echo { filenames_str } | xargs grep -r --include="*.log" "Error.*"'
91
+ res = subprocess .run (command , shell = True , capture_output = True , text = True )
92
+
93
+ if res .stderr :
94
+ print (res .stderr )
95
+ return None
96
+
97
+ if res .stdout :
98
+ bench_pattern = '[78]\d{2}\.[a-zA-Z0-9]+_[rs]'
99
+ BE_reg = f'Error building.*({ bench_pattern } )'
100
+ SE_reg = f'Error during.*setup for({ bench_pattern } )'
101
+ RE_reg = f'Error ({ bench_pattern } ).*errorcode=RE'
102
+ VE_reg = f'Error ({ bench_pattern } ).*errorcode=VE'
103
+
104
+ BE_benches = {match for match in re .findall (BE_reg , res .stdout )}
105
+ BE_benches = list (BE_benches )
106
+ SE_benches = {match for match in re .findall (SE_reg , res .stdout )}
107
+ SE_benches = list (SE_benches )
108
+ RE_benches = {match for match in re .findall (RE_reg , res .stdout )}
109
+ RE_benches = list (RE_benches )
110
+ VE_benches = {match for match in re .findall (VE_reg , res .stdout )}
111
+ VE_benches = list (VE_benches )
112
+
113
+ for b in BE_benches :
114
+ wl_error_state [b ] = 'BE (build error)'
115
+ for b in SE_benches :
116
+ wl_error_state [b ] = 'SE (setup error)'
117
+ for b in RE_benches :
118
+ wl_error_state [b ] = 'RE (runtime error)'
119
+ for b in VE_benches :
120
+ wl_error_state [b ] = 'VE (validation error)'
121
+
122
+ if workload in wl_error_state .keys ():
123
+ return wl_error_state [workload ]
124
+
125
+ return 'Success'
126
+
127
+ def get_path (directory , size , label , num , classes , workloads ):
128
+ cpu_dir = os .path .join (directory , 'benchspec/CPU' )
129
+ run_dir = f'run_base_{ size } _{ label } .{ num } '
130
+ log_dir = f'{ directory } /result'
131
+ speccmds_pattern = f'run/{ run_dir } /speccmds.cmd'
132
+ csv_dict_list = list (defaultdict (str ))
133
+
134
+ if not os .path .exists (log_dir ):
135
+ print (f'Error: cannot find last run result for { label } ' , file = sys .stderr )
136
+ return None
137
+
138
+ for workload in workloads :
139
+ assert workload in all_workloads , f'unsupport workload { workload } '
140
+
141
+ workload_dict = defaultdict (str )
142
+ workload_dict ['name' ] = workload
143
+
144
+ wl_state = get_workload_status (workload , log_dir , label )
145
+ workload_dict ['status' ] = wl_state
146
+ if classes :
147
+ workload_dict ['class' ] = workloads_classes [workload ]
148
+ if not wl_state :
149
+ print (f'warning: error searching run logs for { workload } { label } ' )
150
+ elif wl_state != 'Success' :
151
+ print (f'warning: benchmark { workload } { label } failed: { wl_state } ' ,
152
+ file = sys .stderr )
153
+ csv_dict_list .append (workload_dict )
154
+ continue
155
+ speccmds_path = os .path .join (cpu_dir , workload + '*' , speccmds_pattern )
156
+ speccmds_files = glob .glob (speccmds_path )
157
+ if not speccmds_files :
158
+ print (f'warning: cannot find speccmds.cmd for { workload } with input:{ size } , label:{ label } ' , file = sys .stderr )
159
+ continue
160
+ speccmds_abspath = os .path .abspath (speccmds_files [0 ])
161
+ directory = os .path .dirname (speccmds_abspath )
162
+
163
+ with open (speccmds_abspath , 'r' ) as speccmds_file :
164
+ exe = None
165
+ err_files = []
166
+ # Assume SDE profiling data is writtern to stderr files.
167
+ file_regex = re .compile (r'.*\s-e\s([\w\.-]+)\s.*' + run_dir + r'/([\w\.-]+)' )
168
+ for line in speccmds_file :
169
+ if matches := file_regex .match (line ):
170
+ new_exe = os .path .basename (matches .group (2 ))
171
+ if exe :
172
+ assert new_exe == exe , 'more than 1 exe'
173
+ else :
174
+ exe = new_exe
175
+ err_files .append (os .path .join (directory , os .path .basename (matches .group (1 ))))
176
+
177
+ assert exe , 'not found exe'
178
+ assert err_files , 'not found err files'
179
+ workload_dict ['exe' ] = os .path .join (directory , exe )
180
+ workload_dict ['sim_files' ] = ',' .join (err_files )
181
+
182
+ csv_dict_list .append (workload_dict )
183
+
184
+ return csv_dict_list
185
+
186
+ if __name__ == '__main__' :
187
+ parser = argparse .ArgumentParser (
188
+ description = 'Get paths of binaries and SDE perf data for cpu2017 (version 1.1.8), assuming perf data is written to stderr, e.g. for sde, "-omix /dev/stderr -top_blocks -1 -dynamic_stats_per_block" is used in the submit' )
189
+ parser .add_argument ('dir' , help = 'directory of cpu2017' )
190
+ parser .add_argument ('--size' , choices = ['test' , 'train' , 'ref' ])
191
+ parser .add_argument ('--label' , required = True , help = 'label used in cpu2017 config file' )
192
+ parser .add_argument ('--num' , default = '0000' , help = 'run number' )
193
+ parser .add_argument ('--workloads' , help = 'intersting workloads, which can be a subset {}' .format (',' .join (all_workloads )))
194
+ parser .add_argument ('--filter' , choices = ['speed' , 'rate' ])
195
+ parser .add_argument ('--classes' , action = 'store_true' , help = 'add class info: int_rate, fp_rate, int_speed, fp_speed' )
196
+ parser .add_argument ('-o' , '--output' , required = True , help = 'output CSV for the paths' )
197
+ args = parser .parse_args ()
198
+
199
+ workloads = args .workloads .split (',' ) if args .workloads else all_workloads
200
+ if args .filter == 'speed' :
201
+ workloads = [workload for workload in workloads if workload .endswith ('_s' )]
202
+ elif args .filter == 'rate' :
203
+ workloads = [workload for workload in workloads if workload .endswith ('_r' )]
204
+
205
+ csv_dict_list = get_path (args .dir , args .size , args .label , args .num , args .classes , workloads )
206
+ if csv_dict_list :
207
+ with open (args .output , 'w' ) as csv_file :
208
+ header = csv_dict_list [0 ].keys ()
209
+ csv_writer = csv .DictWriter (csv_file , fieldnames = header )
210
+ csv_writer .writeheader ()
211
+ for row in csv_dict_list :
212
+ csv_writer .writerow (row )
0 commit comments