|
| 1 | +# Copyright Kani Contributors |
| 2 | +# SPDX-License-Identifier: Apache-2.0 OR MIT |
| 3 | + |
| 4 | + |
| 5 | +import dataclasses |
| 6 | +import typing |
| 7 | + |
| 8 | +import benchcomp.visualizers |
| 9 | + |
| 10 | + |
| 11 | +EXIT_CODE = 0 |
| 12 | + |
| 13 | + |
| 14 | +class SingleRegressionCheck: |
| 15 | + """Check whether a single benchmark has regressed on a single metric |
| 16 | +
|
| 17 | + Instances of this class are constructed with the name of a metric to check, |
| 18 | + and a test function that figures out whether that metric has |
| 19 | + regressed. Instances of this class can then be called on pairs of |
| 20 | + benchmarks values. The instance returns true if the second benchmark |
| 21 | + regressed compared to the first. |
| 22 | + """ |
| 23 | + |
| 24 | + metric: str |
| 25 | + test: typing.Callable |
| 26 | + |
| 27 | + |
| 28 | + def __init__(self, metric, test_program): |
| 29 | + self.metric = metric |
| 30 | + try: |
| 31 | + self.test = eval(test_program) |
| 32 | + except SyntaxError: |
| 33 | + logging.error( |
| 34 | + "This test program is not valid Python: '%s'", test_program) |
| 35 | + logging.error( |
| 36 | + "Regression test programs should be Python lambda functions that " |
| 37 | + "take two arguments (the value of a metric when run under two " |
| 38 | + "variants) and returns true if the second value regressed with " |
| 39 | + "respect to the first.") |
| 40 | + sys.exit(1) |
| 41 | + |
| 42 | + |
| 43 | + def __call__(self, old_value, new_value): |
| 44 | + return self.test(old_value, new_value) |
| 45 | + |
| 46 | + |
| 47 | + |
| 48 | +class AnyBenchmarkRegressedChecker: |
| 49 | + """Check whether any benchmark has regressed on a particular metric |
| 50 | +
|
| 51 | + Instances of this class are constructed with the name of a metric to check, |
| 52 | + and the name of a comparison function that figures out whether one variant |
| 53 | + of a benchmark has regressed compared to another variant. |
| 54 | +
|
| 55 | + When called, instances of this class return True iff any of the benchmarks |
| 56 | + regressed. |
| 57 | + """ |
| 58 | + |
| 59 | + def __init__(self, variant_pairs, metric, test, **test_args): |
| 60 | + self.variant_pairs = variant_pairs |
| 61 | + self.metric = metric |
| 62 | + self.test = test |
| 63 | + self.test_args = test_args |
| 64 | + |
| 65 | + |
| 66 | + def __call__(self, results): |
| 67 | + ret = False |
| 68 | + has_regressed = SingleRegressionCheck( |
| 69 | + self.metric, self.test, **self.test_args) |
| 70 | + |
| 71 | + for bench_name, bench in results["benchmarks"].items(): |
| 72 | + for old_variant, new_variant in self.variant_pairs: |
| 73 | + for variant in (old_variant, new_variant): |
| 74 | + if variant not in bench["variants"]: |
| 75 | + logging.warning( |
| 76 | + "benchmark '%s' did not have a value for metric '%s' " |
| 77 | + "when run under variant '%s'", |
| 78 | + bench_name, self.metric, variant) |
| 79 | + continue |
| 80 | + |
| 81 | + old = bench["variants"][old_variant]["metrics"][self.metric] |
| 82 | + new = bench["variants"][new_variant]["metrics"][self.metric] |
| 83 | + |
| 84 | + if has_regressed(old, new): |
| 85 | + logging.warining( |
| 86 | + "Benchmark '%s' regressed on metric '%s' (%s -> %s)", |
| 87 | + bench_name, self.metric, old, new) |
| 88 | + ret = True |
| 89 | + return ret |
| 90 | + |
| 91 | + |
| 92 | + |
| 93 | +@dataclasses.dataclass |
| 94 | +class Generator: |
| 95 | + """Generate all visualizations in a config file given a dict of results""" |
| 96 | + |
| 97 | + config: benchcomp.ConfigFile |
| 98 | + |
| 99 | + |
| 100 | + def __call__(self, results): |
| 101 | + for viz in self.config["visualize"]: |
| 102 | + viz_type = viz.pop("type") |
| 103 | + klass = getattr(benchcomp.visualizers, viz_type) |
| 104 | + visualize = klass(**viz) |
| 105 | + visualize(results) |
0 commit comments