Skip to content

Commit 47afab5

Browse files
authored
Add extra_column to benchcomp markdown visualizer (rust-lang#2415)
1 parent d045764 commit 47afab5

File tree

3 files changed

+181
-18
lines changed

3 files changed

+181
-18
lines changed

tools/benchcomp/benchcomp/visualizers/__init__.py

Lines changed: 97 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,84 @@ def __call__(self, results):
8282

8383

8484
class dump_markdown_results_table:
85-
"""Print a Markdown-formatted table displaying benchmark results
85+
"""Print Markdown-formatted tables displaying benchmark results
86+
87+
For each metric, this visualization prints out a table of benchmarks,
88+
showing the value of the metric for each variant.
8689
8790
The 'out_file' key is mandatory; specify '-' to print to stdout.
8891
92+
'extra_colums' can be an empty dict. The sample configuration below assumes
93+
that each benchmark result has a 'success' and 'runtime' metric for both
94+
variants, 'variant_1' and 'variant_2'. It adds a 'ratio' column to the table
95+
for the 'runtime' metric, and a 'change' column to the table for the
96+
'success' metric. The 'text' lambda is called once for each benchmark. The
97+
'text' lambda accepts a single argument---a dict---that maps variant
98+
names to the value of that variant for a particular metric. The lambda
99+
returns a string that is rendered in the benchmark's row in the new column.
100+
This allows you to emit arbitrary text or markdown formatting in response to
101+
particular combinations of values for different variants, such as
102+
regressions or performance improvements.
103+
89104
Sample configuration:
90105
106+
```
91107
visualize:
92108
- type: dump_markdown_results_table
93-
out_file: '-'
109+
out_file: "-"
110+
extra_columns:
111+
runtime:
112+
- column_name: ratio
113+
text: >
114+
lambda b: str(b["variant_2"]/b["variant_1"])
115+
if b["variant_2"] < (1.5 * b["variant_1"])
116+
else "**" + str(b["variant_2"]/b["variant_1"])
117+
success:
118+
- column_name: change
119+
text: >
120+
lambda b: "" if b["variant_2"] == b["variant_1"]
121+
else "newly passing" if b["variant_2"]
122+
else "regressed"
123+
```
124+
125+
Example output:
126+
127+
```
128+
## runtime
129+
130+
| Benchmark | variant_1 | variant_2 | ratio |
131+
| --- | --- | --- | --- |
132+
| bench_1 | 5 | 10 | **2.0** |
133+
| bench_2 | 10 | 5 | 0.5 |
134+
135+
## success
136+
137+
| Benchmark | variant_1 | variant_2 | notes |
138+
| --- | --- | --- | --- |
139+
| bench_1 | True | True | |
140+
| bench_2 | True | False | regressed |
141+
| bench_3 | False | True | newly passing |
142+
```
94143
"""
95144

96145

97-
def __init__(self, out_file):
146+
def __init__(self, out_file, extra_columns=None):
98147
self.get_out_file = benchcomp.Outfile(out_file)
148+
self.extra_columns = self._eval_column_text(extra_columns or {})
149+
150+
151+
@staticmethod
152+
def _eval_column_text(column_spec):
153+
for columns in column_spec.values():
154+
for column in columns:
155+
try:
156+
column["text"] = eval(column["text"])
157+
except SyntaxError:
158+
logging.error(
159+
"This column text is not a valid python program: '%s'",
160+
column["text"])
161+
sys.exit(1)
162+
return column_spec
99163

100164

101165
@staticmethod
@@ -104,10 +168,10 @@ def _get_template():
104168
{% for metric, benchmarks in d["metrics"].items() %}
105169
## {{ metric }}
106170
107-
| Benchmark | {% for variant in d["variants"] %} {{ variant }} |{% endfor %}
108-
| --- | {% for variant in d["variants"] %}--- |{% endfor -%}
171+
| Benchmark | {% for variant in d["variants"][metric] %} {{ variant }} |{% endfor %}
172+
| --- |{% for variant in d["variants"][metric] %} --- |{% endfor -%}
109173
{% for bench_name, bench_variants in benchmarks.items () %}
110-
| {{ bench_name }} {% for variant in d["variants"] -%}
174+
| {{ bench_name }} {% for variant in d["variants"][metric] -%}
111175
| {{ bench_variants[variant] }} {% endfor %}|
112176
{%- endfor %}
113177
{% endfor -%}
@@ -134,10 +198,35 @@ def _organize_results_into_metrics(results):
134198
return ret
135199

136200

201+
def _add_extra_columns(self, metrics):
202+
for metric, benches in metrics.items():
203+
try:
204+
columns = self.extra_columns[metric]
205+
except KeyError:
206+
continue
207+
for bench, variants in benches.items():
208+
tmp_variants = dict(variants)
209+
for column in columns:
210+
variants[column["column_name"]] = column["text"](tmp_variants)
211+
212+
213+
@staticmethod
214+
def _get_variants(metrics):
215+
ret = {}
216+
for metric, benches in metrics.items():
217+
for bench, variants in benches.items():
218+
ret[metric] = list(variants.keys())
219+
break
220+
return ret
221+
222+
137223
def __call__(self, results):
224+
metrics = self._organize_results_into_metrics(results)
225+
self._add_extra_columns(metrics)
226+
138227
data = {
139-
"metrics": self._organize_results_into_metrics(results),
140-
"variants": list(results["benchmarks"].values())[0]["variants"],
228+
"metrics": metrics,
229+
"variants": self._get_variants(metrics),
141230
}
142231

143232
env = jinja2.Environment(

tools/benchcomp/configs/perf-regression.yaml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,61 @@ visualize:
3333

3434
- type: dump_markdown_results_table
3535
out_file: '-'
36+
extra_columns:
37+
38+
# For these two metrics, display the difference between old and new and
39+
# embolden if the absolute difference is more than 10% of the old value
40+
number_vccs:
41+
- column_name: diff old → new
42+
text: >
43+
lambda b: "" if b["kani_new"] == b["kani_old"]
44+
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
45+
+ ("+" if b["kani_new"] > b["kani_old"] else "")
46+
+ str(b["kani_new"] - b["kani_old"])
47+
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
48+
number_program_steps:
49+
- column_name: diff old → new
50+
text: >
51+
lambda b: "" if b["kani_new"] == b["kani_old"]
52+
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
53+
+ ("+" if b["kani_new"] > b["kani_old"] else "")
54+
+ str(b["kani_new"] - b["kani_old"])
55+
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.1 else "")
56+
57+
# For 'runtime' metrics, display the % change from old to new, emboldening
58+
# cells whose absolute change is >50%
59+
solver_runtime:
60+
- column_name: "% change old → new"
61+
text: >
62+
lambda b: "" if b["kani_new"] == b["kani_old"]
63+
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
64+
+ ("+" if b["kani_new"] > b["kani_old"] else "")
65+
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
66+
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
67+
verification_time:
68+
- column_name: "% change old → new"
69+
text: >
70+
lambda b: "" if b["kani_new"] == b["kani_old"]
71+
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
72+
+ ("+" if b["kani_new"] > b["kani_old"] else "")
73+
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
74+
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
75+
symex_runtime:
76+
- column_name: "% change old → new"
77+
text: >
78+
lambda b: "" if b["kani_new"] == b["kani_old"]
79+
else ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
80+
+ ("+" if b["kani_new"] > b["kani_old"] else "")
81+
+ "%.3f%%" % ((b["kani_new"] - b["kani_old"]) * 100 / b["kani_old"])
82+
+ ("**" if abs((b["kani_new"]-b["kani_old"])/b["kani_old"]) > 0.5 else "")
83+
84+
# For success metric, display some text if success has changed
85+
success:
86+
- column_name: change
87+
text: >
88+
lambda b: "" if b["kani_new"] == b["kani_old"]
89+
else "❌ newly failing" if b["kani_old"]
90+
else "✅ newly passing"
3691
3792
- type: error_on_regression
3893
variant_pairs: [[kani_old, kani_new]]

tools/benchcomp/test/test_regression.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,10 @@ def test_markdown_results_table(self):
402402
"config": {
403403
"directory": str(tmp),
404404
"command_line":
405-
"mkdir bench_1 bench_2"
405+
"mkdir bench_1 bench_2 bench_3"
406406
"&& echo true > bench_1/success"
407407
"&& echo true > bench_2/success"
408+
"&& echo false > bench_3/success"
408409
"&& echo 5 > bench_1/runtime"
409410
"&& echo 10 > bench_2/runtime"
410411
},
@@ -413,9 +414,10 @@ def test_markdown_results_table(self):
413414
"config": {
414415
"directory": str(tmp),
415416
"command_line":
416-
"mkdir bench_1 bench_2"
417+
"mkdir bench_1 bench_2 bench_3"
417418
"&& echo true > bench_1/success"
418419
"&& echo false > bench_2/success"
420+
"&& echo true > bench_3/success"
419421
"&& echo 10 > bench_1/runtime"
420422
"&& echo 5 > bench_2/runtime"
421423
}
@@ -432,6 +434,22 @@ def test_markdown_results_table(self):
432434
"visualize": [{
433435
"type": "dump_markdown_results_table",
434436
"out_file": "-",
437+
"extra_columns": {
438+
"runtime": [{
439+
"column_name": "ratio",
440+
"text":
441+
"lambda b: str(b['variant_2']/b['variant_1'])"
442+
"if b['variant_2'] < 1.5 * b['variant_1'] "
443+
"else '**' + str(b['variant_2']/b['variant_1']) + '**'"
444+
}],
445+
"success": [{
446+
"column_name": "notes",
447+
"text":
448+
"lambda b: '' if b['variant_2'] == b['variant_1']"
449+
"else 'newly passing' if b['variant_2'] "
450+
"else 'regressed'"
451+
}]
452+
}
435453
}]
436454
})
437455
run_bc()
@@ -441,17 +459,18 @@ def test_markdown_results_table(self):
441459
run_bc.stdout, textwrap.dedent("""
442460
## runtime
443461
444-
| Benchmark | variant_1 | variant_2 |
445-
| --- | --- |--- |
446-
| bench_1 | 5 | 10 |
447-
| bench_2 | 10 | 5 |
462+
| Benchmark | variant_1 | variant_2 | ratio |
463+
| --- | --- | --- | --- |
464+
| bench_1 | 5 | 10 | **2.0** |
465+
| bench_2 | 10 | 5 | 0.5 |
448466
449467
## success
450468
451-
| Benchmark | variant_1 | variant_2 |
452-
| --- | --- |--- |
453-
| bench_1 | True | True |
454-
| bench_2 | True | False |
469+
| Benchmark | variant_1 | variant_2 | notes |
470+
| --- | --- | --- | --- |
471+
| bench_1 | True | True | |
472+
| bench_2 | True | False | regressed |
473+
| bench_3 | False | True | newly passing |
455474
"""))
456475

457476

0 commit comments

Comments
 (0)