Skip to content

Commit 3336996

Browse files
authored
Add benchcomp parser for kani perf tests (rust-lang#2327)
Add benchcomp parser for kani perf tests This commit adds a parser that emits CBMC-generated metrics to benchcomp. It is intended to be run in a kani checkout after running `kani-perf.sh`. A minimal working example would be to run `benchcomp` in a directory containing the following benchcomp.yaml: variants: kani_0.17: config: directory: ~/src/kani command_line: git checkout .; git checkout kani-0.17.0 && rm -rf target && cargo build-dev && ./scripts/kani-perf.sh ; true kani_0.24: config: directory: ~/src/kani command_line: git checkout .; git checkout main; rm -rf target && cargo build-dev && ./scripts/kani-perf.sh ; true run: suites: kani_perf: parser: module: kani_perf variants: - kani_0.17 - kani_0.24 This commit also contains minor fixes to ensure that the test suite runs to completion even if it exited with a non-zero return code.
1 parent 1449e62 commit 3336996

File tree

4 files changed

+182
-4
lines changed

4 files changed

+182
-4
lines changed

tools/benchcomp/benchcomp/entry/run.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class _SingleInvocation:
4444
patches: list = dataclasses.field(default_factory=list)
4545

4646
def __post_init__(self):
47+
self.directory = pathlib.Path(self.directory).expanduser()
4748
if self.copy_benchmarks_dir:
4849
self.working_copy = pathlib.Path(
4950
f"/tmp/benchcomp/suites/{uuid.uuid4()}")
@@ -55,7 +56,8 @@ def __call__(self):
5556
env.update(self.env)
5657

5758
if self.copy_benchmarks_dir:
58-
shutil.copytree(self.directory, self.working_copy)
59+
shutil.copytree(
60+
self.directory, self.working_copy, ignore_dangling_symlinks=True)
5961

6062
try:
6163
subprocess.run(
@@ -65,12 +67,10 @@ def __call__(self):
6567
logging.warning(
6668
"Invocation of suite %s with variant %s exited with code %d",
6769
self.suite_id, self.variant_id, exc.returncode)
68-
return
6970
except (OSError, subprocess.SubprocessError):
7071
logging.error(
7172
"Invocation of suite %s with variant %s failed", self.suite_id,
7273
self.variant_id)
73-
return
7474

7575
parser_mod_name = f"benchcomp.parsers.{self.parser}"
7676
parser = importlib.import_module(parser_mod_name)
@@ -115,7 +115,8 @@ def __call__(self):
115115

116116
# Atomically symlink the symlink dir to the output dir, even if
117117
# there is already an existing symlink with that name
118-
tmp_symlink = self.out_symlink.with_suffix(f".{uuid.uuid4()}")
118+
tmp_symlink = pathlib.Path(
119+
self.out_symlink).with_suffix(f".{uuid.uuid4()}")
119120
tmp_symlink.parent.mkdir(exist_ok=True)
120121
tmp_symlink.symlink_to(out_path)
121122
tmp_symlink.rename(self.out_symlink)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright Kani Contributors
2+
# SPDX-License-Identifier: Apache-2.0 OR MIT
3+
4+
5+
import pathlib
6+
import textwrap
7+
import re
8+
9+
10+
def get_description():
11+
return textwrap.dedent("""\
12+
Read Kani and CBMC statistics from the expected.out files of the kani
13+
perf regression suite.
14+
""")
15+
16+
17+
def _get_metrics():
18+
return {
19+
"verification_time": {
20+
# Letter 'e' and hyphen handle scientific notation
21+
"pat": re.compile(r"Verification Time: (?P<value>[-e\d\.]+)s"),
22+
"parse": float,
23+
},
24+
"solver_runtime": {
25+
"pat": re.compile(r"Runtime Solver: (?P<value>[-e\d\.]+)s"),
26+
"parse": float,
27+
},
28+
"symex_runtime": {
29+
"pat": re.compile(r"Runtime Symex: (?P<value>[-e\d\.]+)s"),
30+
"parse": float,
31+
},
32+
"success": {
33+
"pat": re.compile(r"VERIFICATION:- (?P<value>\w+)"),
34+
"parse": lambda v: v == "SUCCESSFUL",
35+
},
36+
}
37+
38+
39+
def get_metrics():
40+
metrics = dict(_get_metrics())
41+
for metric, info in metrics.items():
42+
for field in ("pat", "parse"):
43+
info.pop(field)
44+
return metrics
45+
46+
47+
def main(root_dir):
48+
benchmarks = {}
49+
test_out_dir = root_dir / "build" / "tests" / "perf"
50+
harness_pat = re.compile(r"Checking harness (?P<name>.+)\.\.\.")
51+
52+
metrics = _get_metrics()
53+
for out_file in pathlib.Path(test_out_dir).rglob("expected.out"):
54+
test_name = str(out_file.parent.parent.relative_to(test_out_dir))
55+
with open(out_file) as handle:
56+
for line in handle:
57+
# Each outfile contains output from multiple harnesses
58+
m = harness_pat.match(line)
59+
if m:
60+
bench_name = f"{test_name}/{m['name']}"
61+
benchmarks[bench_name] = {"metrics": {}}
62+
continue
63+
64+
for metric, metric_info in metrics.items():
65+
m = metric_info["pat"].match(line)
66+
if not m:
67+
continue
68+
69+
parse = metric_info["parse"]
70+
try:
71+
# CBMC prints out some metrics more than once, e.g.
72+
# "Solver" and "decision procedure". Add those
73+
# values together
74+
benchmarks[bench_name]["metrics"][metric] += parse(m["value"])
75+
except (KeyError, TypeError):
76+
benchmarks[bench_name]["metrics"][metric] = parse(m["value"])
77+
break
78+
79+
return {
80+
"metrics": get_metrics(),
81+
"benchmarks": benchmarks,
82+
}

tools/benchcomp/benchcomp/visualizers/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import dataclasses
66

7+
import yaml
8+
79
import benchcomp.visualizers.utils as viz_utils
810

911

@@ -49,3 +51,18 @@ def __call__(self, results):
4951

5052
if any_benchmark_regressed(results):
5153
viz_utils.EXIT_CODE = 1
54+
55+
56+
57+
@dataclasses.dataclass
58+
class dump_yaml:
59+
"""Print the YAML-formatted results to stdout
60+
61+
Sample configuration:
62+
63+
visualize:
64+
- type: dump_yaml
65+
"""
66+
67+
def __call__(self, results):
68+
print(yaml.dump(results, default_flow_style=False))

tools/benchcomp/test/test_regression.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,84 @@ def __call__(self, subcommand=None, default_flags=None, *flags):
4444

4545

4646
class RegressionTests(unittest.TestCase):
47+
def setUp(self):
48+
self.kani_dir = pathlib.Path(__file__).parent.parent.parent.parent
49+
50+
def test_kani_perf_fail(self):
51+
cmd = (
52+
"rm -rf build target &&"
53+
"mkdir -p build/tests/perf/Unwind-Attribute/expected &&"
54+
"kani tests/kani/Unwind-Attribute/fixme_lib.rs > "
55+
"build/tests/perf/Unwind-Attribute/expected/expected.out"
56+
)
57+
self._run_kani_perf_test(cmd, False)
58+
59+
def test_kani_perf_success(self):
60+
cmd = (
61+
"rm -rf build target &&"
62+
"mkdir -p build/tests/perf/Arbitrary/expected &&"
63+
"kani tests/kani/Arbitrary/arbitrary_impls.rs > "
64+
"build/tests/perf/Arbitrary/expected/expected.out"
65+
)
66+
self._run_kani_perf_test(cmd, True)
67+
68+
def _run_kani_perf_test(self, command, expected_pass):
69+
"""Ensure that the kani_perf parser can parse the output of a perf test"""
70+
71+
# The two variants are identical; we're not actually checking the
72+
# returned metrics in this test, only checking that the parser works
73+
run_bc = Benchcomp({
74+
"variants": {
75+
"run_1": {
76+
"config": {
77+
"directory": str(self.kani_dir),
78+
"command_line": command,
79+
},
80+
},
81+
"run_2": {
82+
"config": {
83+
"directory": str(self.kani_dir),
84+
"command_line": command,
85+
},
86+
},
87+
},
88+
"run": {
89+
"suites": {
90+
"suite_1": {
91+
"parser": { "module": "kani_perf" },
92+
"variants": ["run_1", "run_2"]
93+
}
94+
}
95+
},
96+
"visualize": [{"type": "dump_yaml"}],
97+
})
98+
run_bc()
99+
self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr)
100+
101+
results = yaml.safe_load(run_bc.stdout)
102+
103+
expected_types = {
104+
"solver_runtime": float,
105+
"symex_runtime": float,
106+
"verification_time": float,
107+
"success": bool,
108+
}
109+
110+
all_succeeded = True
111+
112+
for _, bench in results["benchmarks"].items():
113+
for _, variant in bench["variants"].items():
114+
115+
all_succeeded &= variant["metrics"]["success"]
116+
117+
for metric, ttype in expected_types.items():
118+
self.assertIn(metric, variant["metrics"], msg=run_bc.stdout)
119+
self.assertTrue(
120+
isinstance(variant["metrics"][metric], ttype),
121+
msg=run_bc.stdout)
122+
123+
self.assertEqual(expected_pass, all_succeeded, msg=run_bc.stdout)
124+
47125
def test_error_on_regression_two_benchmarks_previously_failed(self):
48126
"""Ensure that benchcomp terminates with exit of 0 when the "error_on_regression" visualization is configured and one of the benchmarks continues to fail (no regression)."""
49127

0 commit comments

Comments
 (0)