Add benchcomp parser for kani perf tests (rust-lang#2327)

karkhaz · web-flow · commit 333699625f2f · 2023-04-11T16:39:58.000+01:00
Add benchcomp parser for kani perf tests

This commit adds a parser that emits CBMC-generated metrics to
benchcomp. It is intended to be run in a kani checkout after running
`kani-perf.sh`. A minimal working example would be to run `benchcomp` in
a directory containing the following benchcomp.yaml:

    variants:
      kani_0.17:
        config:
          directory: ~/src/kani
          command_line: git checkout .; git checkout kani-0.17.0 &amp;&amp; rm -rf target &amp;&amp; cargo build-dev &amp;&amp; ./scripts/kani-perf.sh ; true
      kani_0.24:
        config:
          directory: ~/src/kani
          command_line: git checkout .; git checkout main; rm -rf target &amp;&amp; cargo build-dev &amp;&amp; ./scripts/kani-perf.sh ; true
    run:
      suites:
        kani_perf:
          parser:
            module: kani_perf
          variants:
            - kani_0.17
            - kani_0.24

This commit also contains minor fixes to ensure that the test suite runs
to completion even if it exited with a non-zero return code.
diff --git a/tools/benchcomp/benchcomp/entry/run.py b/tools/benchcomp/benchcomp/entry/run.py
@@ -44,6 +44,7 @@ class _SingleInvocation:
     patches: list = dataclasses.field(default_factory=list)
 
     def __post_init__(self):
+        self.directory = pathlib.Path(self.directory).expanduser()
         if self.copy_benchmarks_dir:
             self.working_copy = pathlib.Path(
                 f"/tmp/benchcomp/suites/{uuid.uuid4()}")
@@ -55,7 +56,8 @@ def __call__(self):
         env.update(self.env)
 
         if self.copy_benchmarks_dir:
-            shutil.copytree(self.directory, self.working_copy)
+            shutil.copytree(
+                self.directory, self.working_copy, ignore_dangling_symlinks=True)
 
         try:
             subprocess.run(
@@ -65,12 +67,10 @@ def __call__(self):
             logging.warning(
                 "Invocation of suite %s with variant %s exited with code %d",
                 self.suite_id, self.variant_id, exc.returncode)
-            return
         except (OSError, subprocess.SubprocessError):
             logging.error(
                 "Invocation of suite %s with variant %s failed", self.suite_id,
                 self.variant_id)
-            return
 
         parser_mod_name = f"benchcomp.parsers.{self.parser}"
         parser = importlib.import_module(parser_mod_name)
@@ -115,7 +115,8 @@ def __call__(self):
 
         # Atomically symlink the symlink dir to the output dir, even if
         # there is already an existing symlink with that name
-        tmp_symlink = self.out_symlink.with_suffix(f".{uuid.uuid4()}")
+        tmp_symlink = pathlib.Path(
+            self.out_symlink).with_suffix(f".{uuid.uuid4()}")
         tmp_symlink.parent.mkdir(exist_ok=True)
         tmp_symlink.symlink_to(out_path)
         tmp_symlink.rename(self.out_symlink)
diff --git a/tools/benchcomp/benchcomp/parsers/kani_perf.py b/tools/benchcomp/benchcomp/parsers/kani_perf.py
@@ -0,0 +1,82 @@
+# Copyright Kani Contributors
+# SPDX-License-Identifier: Apache-2.0 OR MIT
+
+
+import pathlib
+import textwrap
+import re
+
+
+def get_description():
+    return textwrap.dedent("""\
+        Read Kani and CBMC statistics from the expected.out files of the kani
+        perf regression suite.
+        """)
+
+
+def _get_metrics():
+    return {
+        "verification_time": {
+            # Letter 'e' and hyphen handle scientific notation
+            "pat": re.compile(r"Verification Time: (?P<value>[-e\d\.]+)s"),
+            "parse": float,
+        },
+        "solver_runtime": {
+            "pat": re.compile(r"Runtime Solver: (?P<value>[-e\d\.]+)s"),
+            "parse": float,
+        },
+        "symex_runtime": {
+            "pat": re.compile(r"Runtime Symex: (?P<value>[-e\d\.]+)s"),
+            "parse": float,
+        },
+        "success": {
+            "pat": re.compile(r"VERIFICATION:- (?P<value>\w+)"),
+            "parse": lambda v: v == "SUCCESSFUL",
+        },
+    }
+
+
+def get_metrics():
+    metrics = dict(_get_metrics())
+    for metric, info in metrics.items():
+        for field in ("pat", "parse"):
+            info.pop(field)
+    return metrics
+
+
+def main(root_dir):
+    benchmarks = {}
+    test_out_dir = root_dir / "build" / "tests" / "perf"
+    harness_pat = re.compile(r"Checking harness (?P<name>.+)\.\.\.")
+
+    metrics = _get_metrics()
+    for out_file in pathlib.Path(test_out_dir).rglob("expected.out"):
+        test_name = str(out_file.parent.parent.relative_to(test_out_dir))
+        with open(out_file) as handle:
+            for line in handle:
+                # Each outfile contains output from multiple harnesses
+                m = harness_pat.match(line)
+                if m:
+                    bench_name = f"{test_name}/{m['name']}"
+                    benchmarks[bench_name] = {"metrics": {}}
+                    continue
+
+                for metric, metric_info in metrics.items():
+                    m = metric_info["pat"].match(line)
+                    if not m:
+                        continue
+
+                    parse = metric_info["parse"]
+                    try:
+                        # CBMC prints out some metrics more than once, e.g.
+                        # "Solver" and "decision procedure". Add those
+                        # values together
+                        benchmarks[bench_name]["metrics"][metric] += parse(m["value"])
+                    except (KeyError, TypeError):
+                        benchmarks[bench_name]["metrics"][metric] = parse(m["value"])
+                    break
+
+    return {
+        "metrics": get_metrics(),
+        "benchmarks": benchmarks,
+    }
diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py
@@ -4,6 +4,8 @@
 
 import dataclasses
 
+import yaml
+
 import benchcomp.visualizers.utils as viz_utils
 
 
@@ -49,3 +51,18 @@ def __call__(self, results):
 
         if any_benchmark_regressed(results):
             viz_utils.EXIT_CODE = 1
+
+
+
+@dataclasses.dataclass
+class dump_yaml:
+    """Print the YAML-formatted results to stdout
+
+    Sample configuration:
+
+    visualize:
+    - type: dump_yaml
+    """
+
+    def __call__(self, results):
+        print(yaml.dump(results, default_flow_style=False))
diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -44,6 +44,84 @@ def __call__(self, subcommand=None, default_flags=None, *flags):
 
 
 class RegressionTests(unittest.TestCase):
+    def setUp(self):
+        self.kani_dir = pathlib.Path(__file__).parent.parent.parent.parent
+
+    def test_kani_perf_fail(self):
+        cmd = (
+            "rm -rf build target &&"
+            "mkdir -p build/tests/perf/Unwind-Attribute/expected &&"
+            "kani tests/kani/Unwind-Attribute/fixme_lib.rs > "
+            "build/tests/perf/Unwind-Attribute/expected/expected.out"
+        )
+        self._run_kani_perf_test(cmd, False)
+
+    def test_kani_perf_success(self):
+        cmd = (
+            "rm -rf build target &&"
+            "mkdir -p build/tests/perf/Arbitrary/expected &&"
+            "kani tests/kani/Arbitrary/arbitrary_impls.rs > "
+            "build/tests/perf/Arbitrary/expected/expected.out"
+        )
+        self._run_kani_perf_test(cmd, True)
+
+    def _run_kani_perf_test(self, command, expected_pass):
+        """Ensure that the kani_perf parser can parse the output of a perf test"""
+
+        # The two variants are identical; we're not actually checking the
+        # returned metrics in this test, only checking that the parser works
+        run_bc = Benchcomp({
+            "variants": {
+                "run_1": {
+                    "config": {
+                        "directory": str(self.kani_dir),
+                        "command_line": command,
+                    },
+                },
+                "run_2": {
+                    "config": {
+                        "directory": str(self.kani_dir),
+                        "command_line": command,
+                    },
+                },
+            },
+            "run": {
+                "suites": {
+                    "suite_1": {
+                        "parser": { "module": "kani_perf" },
+                        "variants": ["run_1", "run_2"]
+                    }
+                }
+            },
+            "visualize": [{"type": "dump_yaml"}],
+        })
+        run_bc()
+        self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+        results = yaml.safe_load(run_bc.stdout)
+
+        expected_types = {
+            "solver_runtime": float,
+            "symex_runtime": float,
+            "verification_time": float,
+            "success": bool,
+        }
+
+        all_succeeded = True
+
+        for _, bench in results["benchmarks"].items():
+            for _, variant in bench["variants"].items():
+
+                all_succeeded &= variant["metrics"]["success"]
+
+                for metric, ttype in expected_types.items():
+                    self.assertIn(metric, variant["metrics"], msg=run_bc.stdout)
+                    self.assertTrue(
+                        isinstance(variant["metrics"][metric], ttype),
+                        msg=run_bc.stdout)
+
+        self.assertEqual(expected_pass, all_succeeded, msg=run_bc.stdout)
+
     def test_error_on_regression_two_benchmarks_previously_failed(self):
         """Ensure that benchcomp terminates with exit of 0 when the "error_on_regression" visualization is configured and one of the benchmarks continues to fail (no regression)."""