Skip to content

Commit 94a26d1

Browse files
authored
Add 'benchcomp visualize' & error on regression (rust-lang#2348)
This commit adds an implementation for the `benchcomp visualize` command. Currently, there is one visualization, "error_on_regression", which causes `benchcomp` or `benchcomp visualize` to terminate with a return code of 1 if there was a regression in any of the metrics. Users can specify the following in their config file: visualize: - type: error_on_regression variant_pairs: - [variant_1, variant_2] - [variant_1, variant_3] checks: - metric: runtime test: "lambda old, new: new / old > 1.1" - metric: passed test: "lambda old, new: False if not old else not new" This says to check whether any benchmark regressed when run under variant_2 compared to variant_1. A benchmark is considered to have regressed if the value of the 'runtime' metric under variant_2 is 10% higher than the value under variant_1. Furthermore, the benchmark is also considered to have regressed if it was previously passing, but is now failing. These same checks are performed on all benchmarks run under variant_3 compared to variant_1. If any of those lambda functions returns True, then benchcomp will terminate with a return code of 1. This commit fixes rust-lang#2338.
1 parent 5230d62 commit 94a26d1

File tree

6 files changed

+461
-2
lines changed

6 files changed

+461
-2
lines changed

tools/benchcomp/benchcomp/entry/benchcomp.py

+2
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ def main(args):
1515

1616
args.suites_dir = run_result.out_prefix / run_result.out_symlink
1717
results = benchcomp.entry.collate.main(args)
18+
19+
benchcomp.entry.visualize.main(args)

tools/benchcomp/benchcomp/entry/visualize.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,16 @@
44
# Entrypoint for `benchcomp visualize`
55

66

7-
def main(_):
8-
pass
7+
import sys
8+
9+
import yaml
10+
11+
import benchcomp.visualizers.utils
12+
13+
def main(args):
14+
with open(args.result_file, encoding="utf-8") as handle:
15+
results = yaml.safe_load(handle)
16+
17+
generate_visualizations = benchcomp.visualizers.utils.Generator(args.config)
18+
generate_visualizations(results)
19+
sys.exit(benchcomp.visualizers.utils.EXIT_CODE)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
#
4+
# This parser is used by the test suite. It reads files in a directory of
5+
# directories. Each directory represents a benchmark name; inside that
6+
# directory, every file name is the name of a metric and the contents of the
7+
# file are the metric value. This is to allow writing ad-hoc regression tests
8+
# without actually running a real benchmark suite.
9+
10+
11+
import json
12+
import pathlib
13+
14+
15+
def main(root_dir):
16+
ret = {
17+
"metrics": {},
18+
"benchmarks": {},
19+
}
20+
for benchmark in pathlib.Path(root_dir).iterdir():
21+
ret["benchmarks"][benchmark.name] = {"metrics": {}}
22+
for metric in pathlib.Path(benchmark).iterdir():
23+
ret["metrics"][metric.name] = {}
24+
with open(metric) as handle:
25+
value = json.loads(handle.read().strip())
26+
ret["benchmarks"][benchmark.name]["metrics"][metric.name] = value
27+
return ret
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
4+
5+
import dataclasses
6+
7+
import benchcomp.visualizers.utils as viz_utils
8+
9+
10+
# TODO The doc comment should appear in the help output, which should list all
11+
# available checks.
12+
13+
@dataclasses.dataclass
14+
class error_on_regression:
15+
"""Terminate benchcomp with a return code of 1 if any benchmark regressed.
16+
17+
This visualization checks whether any benchmark regressed from one variant
18+
to another. Sample configuration:
19+
20+
visualize:
21+
- type: error_on_regression
22+
variant_pairs:
23+
- [variant_1, variant_2]
24+
- [variant_1, variant_3]
25+
checks:
26+
- metric: runtime
27+
test: "lambda old, new: new / old > 1.1"
28+
- metric: passed
29+
test: "lambda old, new: False if not old else not new"
30+
31+
This says to check whether any benchmark regressed when run under variant_2
32+
compared to variant_1. A benchmark is considered to have regressed if the
33+
value of the 'runtime' metric under variant_2 is 10% higher than the value
34+
under variant_1. Furthermore, the benchmark is also considered to have
35+
regressed if it was previously passing, but is now failing. These same
36+
checks are performed on all benchmarks run under variant_3 compared to
37+
variant_1. If any of those lambda functions returns True, then benchcomp
38+
will terminate with a return code of 1.
39+
"""
40+
41+
checks: list
42+
variant_pairs: list
43+
44+
45+
def __call__(self, results):
46+
for check in self.checks:
47+
any_benchmark_regressed = viz_utils.AnyBenchmarkRegressedChecker(
48+
self.variant_pairs, **check)
49+
50+
if any_benchmark_regressed(results):
51+
viz_utils.EXIT_CODE = 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
4+
5+
import dataclasses
6+
import typing
7+
8+
import benchcomp.visualizers
9+
10+
11+
EXIT_CODE = 0
12+
13+
14+
class SingleRegressionCheck:
15+
"""Check whether a single benchmark has regressed on a single metric
16+
17+
Instances of this class are constructed with the name of a metric to check,
18+
and a test function that figures out whether that metric has
19+
regressed. Instances of this class can then be called on pairs of
20+
benchmarks values. The instance returns true if the second benchmark
21+
regressed compared to the first.
22+
"""
23+
24+
metric: str
25+
test: typing.Callable
26+
27+
28+
def __init__(self, metric, test_program):
29+
self.metric = metric
30+
try:
31+
self.test = eval(test_program)
32+
except SyntaxError:
33+
logging.error(
34+
"This test program is not valid Python: '%s'", test_program)
35+
logging.error(
36+
"Regression test programs should be Python lambda functions that "
37+
"take two arguments (the value of a metric when run under two "
38+
"variants) and returns true if the second value regressed with "
39+
"respect to the first.")
40+
sys.exit(1)
41+
42+
43+
def __call__(self, old_value, new_value):
44+
return self.test(old_value, new_value)
45+
46+
47+
48+
class AnyBenchmarkRegressedChecker:
49+
"""Check whether any benchmark has regressed on a particular metric
50+
51+
Instances of this class are constructed with the name of a metric to check,
52+
and the name of a comparison function that figures out whether one variant
53+
of a benchmark has regressed compared to another variant.
54+
55+
When called, instances of this class return True iff any of the benchmarks
56+
regressed.
57+
"""
58+
59+
def __init__(self, variant_pairs, metric, test, **test_args):
60+
self.variant_pairs = variant_pairs
61+
self.metric = metric
62+
self.test = test
63+
self.test_args = test_args
64+
65+
66+
def __call__(self, results):
67+
ret = False
68+
has_regressed = SingleRegressionCheck(
69+
self.metric, self.test, **self.test_args)
70+
71+
for bench_name, bench in results["benchmarks"].items():
72+
for old_variant, new_variant in self.variant_pairs:
73+
for variant in (old_variant, new_variant):
74+
if variant not in bench["variants"]:
75+
logging.warning(
76+
"benchmark '%s' did not have a value for metric '%s' "
77+
"when run under variant '%s'",
78+
bench_name, self.metric, variant)
79+
continue
80+
81+
old = bench["variants"][old_variant]["metrics"][self.metric]
82+
new = bench["variants"][new_variant]["metrics"][self.metric]
83+
84+
if has_regressed(old, new):
85+
logging.warining(
86+
"Benchmark '%s' regressed on metric '%s' (%s -> %s)",
87+
bench_name, self.metric, old, new)
88+
ret = True
89+
return ret
90+
91+
92+
93+
@dataclasses.dataclass
94+
class Generator:
95+
"""Generate all visualizations in a config file given a dict of results"""
96+
97+
config: benchcomp.ConfigFile
98+
99+
100+
def __call__(self, results):
101+
for viz in self.config["visualize"]:
102+
viz_type = viz.pop("type")
103+
klass = getattr(benchcomp.visualizers, viz_type)
104+
visualize = klass(**viz)
105+
visualize(results)

0 commit comments

Comments
 (0)