Skip to content

Commit 345011d

Browse files
committed
Updated requiremens, extract_ir perf improvements, benchmark reporting
* Using precise pip package versions in requirements.txt, to avoid unwanted upgrades. * Fix in extract_ir (authored by [email protected]), speedup in extraction by a few orders of magnitude. * Tools to post-process json benchmark reports, when benchmarks collect perf counters - this helps validate hypotheses about improvements/regressions (benchmarks: http://github.com/google/benchmark)
1 parent 9bf0460 commit 345011d

File tree

5 files changed

+392
-12
lines changed

5 files changed

+392
-12
lines changed
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# coding=utf-8
2+
# Copyright 2020 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Analysis for benchmark results.json."""
17+
18+
import collections
19+
import math
20+
import statistics
21+
22+
from typing import Any
23+
from typing import Dict
24+
from typing import Iterable
25+
from typing import List
26+
from typing import Tuple
27+
28+
# For each benchmark, and for each counter, capture the recorded values.
29+
PerBenchmarkResults = Dict[str, Dict[str, List[float]]]
30+
31+
# Benchmark data, as captured by the benchmark json output: a dictionary from
32+
# benchmark names to a list of run results. Each run result is a dictionary of
33+
# key-value pairs, e.g. counter name - value.
34+
BenchmarkRunResults = Dict[str, List[Dict[str, Any]]]
35+
36+
# A comparison per benchmark, per counter, capturing the geomean and the stdev
37+
# of the base and experiment values.
38+
ABComparison = Dict[str, Dict[str, Tuple[float, float, float]]]
39+
40+
41+
def _geomean(data: List[float]):
42+
return math.exp(sum([math.log(x) for x in data]) / len(data))
43+
44+
45+
def _stdev(data: List[float]):
46+
assert data
47+
return 0.0 if len(data) == 1 else statistics.stdev(data)
48+
49+
50+
class BenchmarkReport:
51+
"""The counter values collected for benchmarks in a benchmark suite."""
52+
53+
def __init__(self, suite_name: str, json_data: BenchmarkRunResults,
54+
counter_names: Iterable[str]):
55+
self._suite_name = suite_name
56+
self._load_values(json_data, counter_names)
57+
58+
def suite_name(self):
59+
return self._suite_name
60+
61+
def values(self):
62+
return self._values
63+
64+
def names(self):
65+
return self._names
66+
67+
def counters(self):
68+
return self._counters
69+
70+
def raw_measurements(self):
71+
return self._raw_measurements
72+
73+
def counter_means(self, benchmark: str, counter: str) -> Tuple[float, float]:
74+
if counter not in self.counters():
75+
raise ValueError('unknown counter')
76+
if benchmark not in self.names():
77+
raise ValueError('unknown benchmark')
78+
return (_geomean(self._values[benchmark][counter]),
79+
_stdev(self._values[benchmark][counter]))
80+
81+
def zero_counters(self):
82+
ret = set()
83+
for name in self.names():
84+
for counter in self.values()[name]:
85+
if 0.0 in self.values()[name][counter]:
86+
ret.add((name, counter))
87+
return frozenset(ret)
88+
89+
def large_variation_counters(self, variation: float):
90+
ret = set()
91+
for name in self.names():
92+
for counter in self.values()[name]:
93+
vals = self.values()[name][counter]
94+
swing = _stdev(vals) / _geomean(vals)
95+
if swing > variation:
96+
ret.add((name, counter, swing))
97+
return frozenset(ret)
98+
99+
def _load_values(self, data: BenchmarkRunResults,
100+
names: Iterable[str]) -> PerBenchmarkResults:
101+
"""Organize json values per-benchmark, per counter.
102+
103+
Args:
104+
data: json data
105+
names: perf counter names
106+
Returns:
107+
benchmark data organized per-benchmark, per-counter name.
108+
"""
109+
runs = data['benchmarks']
110+
self._values = collections.defaultdict(
111+
lambda: collections.defaultdict(list))
112+
self._raw_measurements = collections.defaultdict(
113+
lambda: collections.defaultdict(list))
114+
self._counters = set()
115+
self._names = set()
116+
117+
for r in runs:
118+
benchmark_name = r['name']
119+
for counter in names:
120+
value = float(r[counter])
121+
iters = float(r['iterations'])
122+
self._raw_measurements[benchmark_name][counter].append(value * iters)
123+
self._values[benchmark_name][counter].append(value)
124+
self._counters.add(counter)
125+
self._names.add(benchmark_name)
126+
self._counters = frozenset(self._counters)
127+
self._names = frozenset(self._names)
128+
129+
130+
class BenchmarkComparison:
131+
"""Analysis of 2 benchmark runs."""
132+
133+
def __init__(self, base_report: BenchmarkReport, exp_report: BenchmarkReport):
134+
if base_report.suite_name() != exp_report.suite_name():
135+
raise ValueError('cannot compare different suites')
136+
if set(base_report.names()) != set(exp_report.names()):
137+
raise ValueError('suite runs have different benchmark names')
138+
if set(base_report.counters()) != set(exp_report.counters()):
139+
raise ValueError(
140+
'counter names are different between base and experiment')
141+
142+
self._base = base_report
143+
self._exp = exp_report
144+
145+
def suite_name(self):
146+
return self._base.suite_name()
147+
148+
def summarize(self) -> ABComparison:
149+
"""Summarize the results from two runs (base/experiment).
150+
151+
Returns:
152+
A per benchmark, per counter summary of the improvement/regression
153+
between the 2 runs, in percents.
154+
"""
155+
base_results = self._base.values()
156+
exp_results = self._exp.values()
157+
158+
ret = {}
159+
for bname in base_results:
160+
ret[bname] = {}
161+
for counter in base_results[bname]:
162+
base_vals = base_results[bname][counter]
163+
exp_vals = exp_results[bname][counter]
164+
base_geomean = _geomean(base_vals)
165+
exp_geomean = _geomean(exp_vals)
166+
improvement = 1 - exp_geomean / base_geomean
167+
base_stdev = _stdev(base_vals)
168+
exp_stdev = _stdev(exp_vals)
169+
ret[bname][counter] = (improvement, base_stdev / base_geomean,
170+
exp_stdev / exp_geomean)
171+
return ret
172+
173+
def names(self):
174+
return self._base.names()
175+
176+
def counters(self):
177+
return self._base.counters()
178+
179+
def total_improvement(self, counter: str):
180+
assert counter in self.counters()
181+
logsum = 0
182+
# we look at the geomean of the improvement for each benchmark
183+
for bname in self.names():
184+
b_geomean, _ = self._base.counter_means(bname, counter)
185+
e_geomean, _ = self._exp.counter_means(bname, counter)
186+
logsum += math.log(e_geomean / b_geomean)
187+
return 1.0 - math.exp(logsum / len(self.names()))
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# coding=utf-8
2+
# Copyright 2020 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
r"""Convert benchmark results.json to csv.
17+
18+
To run:
19+
python3 compiler_opt/tools/benchmark_report_counter.py \
20+
--base=/tmp/base_report.json \
21+
--exp=/tmp/exp_report.json \
22+
--counters=INSTRUCTIONS \
23+
--counters=CYCLES \
24+
--output=/tmp/summary.csv
25+
26+
optionally, add --suite_name=<name of benchmark>, if batch-processing multiple
27+
benchmarks' reports.
28+
29+
Assuming /tmp/{base|exp}_report.json were produced from benchmark runs, which
30+
were asked to collect the counters named INSTRUCTIONS and CYCLES.
31+
"""
32+
33+
import csv
34+
import json
35+
36+
from typing import Sequence
37+
38+
from absl import app
39+
from absl import flags
40+
41+
import tensorflow.compat.v2 as tf
42+
43+
from compiler_opt.tools import benchmark_report
44+
45+
flags.DEFINE_string('suite_name', 'benchmark_suite',
46+
'The name of the benchmark suite (for reporting).')
47+
flags.DEFINE_string('base', None,
48+
'JSON report produced by the base benchmark run.')
49+
flags.DEFINE_string('exp', None,
50+
'JSON report produced by the experiment benchmark run.')
51+
flags.DEFINE_string('output', 'reports.csv', 'CSV output')
52+
flags.DEFINE_multi_string(
53+
'counters', None,
54+
'Counter names. Should match exactly the names used when running the'
55+
'benchmark.')
56+
57+
FLAGS = flags.FLAGS
58+
59+
60+
def main(argv: Sequence[str]) -> None:
61+
if len(argv) > 1:
62+
raise app.UsageError('Too many command-line arguments.')
63+
with tf.io.gfile.GFile(FLAGS.base, 'r') as b:
64+
with tf.io.gfile.GFile(FLAGS.exp, 'r') as e:
65+
b = benchmark_report.BenchmarkReport(FLAGS.suite_name, json.load(b),
66+
FLAGS.counters)
67+
e = benchmark_report.BenchmarkReport(FLAGS.suite_name, json.load(e),
68+
FLAGS.counters)
69+
comparison = benchmark_report.BenchmarkComparison(b, e)
70+
summary = comparison.summarize()
71+
with tf.io.gfile.GFile(FLAGS.output, 'w+') as o:
72+
co = csv.writer(o)
73+
for bm in summary:
74+
for c in summary[bm]:
75+
co.writerow([bm, c] + list(summary[bm][c]))
76+
77+
78+
if __name__ == '__main__':
79+
app.run(main)
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# coding=utf-8
2+
# Copyright 2020 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Tests for compiler_opt.tools.benchmark_report_converter."""
17+
18+
from absl.testing import absltest
19+
20+
from compiler_opt.tools import benchmark_report
21+
22+
23+
base_data = {
24+
'benchmarks': [
25+
{
26+
'PerfCounter_0': 10,
27+
'PerfCounter_1': 20,
28+
'iterations': 10,
29+
'name': 'BM_A',
30+
},
31+
{
32+
'PerfCounter_0': 11,
33+
'PerfCounter_1': 19,
34+
'iterations': 11,
35+
'name': 'BM_A',
36+
},
37+
{
38+
'PerfCounter_0': 60,
39+
'PerfCounter_1': 50,
40+
'iterations': 15,
41+
'name': 'BM_B',
42+
},
43+
]
44+
}
45+
46+
exp_data = {
47+
'benchmarks': [
48+
{
49+
'PerfCounter_0': 9,
50+
'PerfCounter_1': 11,
51+
'iterations': 11,
52+
'name': 'BM_A',
53+
},
54+
{
55+
'PerfCounter_0': 8,
56+
'PerfCounter_1': 10,
57+
'iterations': 8,
58+
'name': 'BM_A',
59+
},
60+
{
61+
'PerfCounter_0': 62,
62+
'PerfCounter_1': 54,
63+
'iterations': 14,
64+
'name': 'BM_B',
65+
},
66+
]
67+
}
68+
69+
70+
class BenchmarkReportConverterTest(absltest.TestCase):
71+
72+
def test_loading(self):
73+
report = benchmark_report.BenchmarkReport(
74+
'foo', base_data, ['PerfCounter_0', 'PerfCounter_1'])
75+
self.assertEqual(
76+
report.values(), {
77+
'BM_A': {
78+
'PerfCounter_0': [10, 11],
79+
'PerfCounter_1': [20, 19]
80+
},
81+
'BM_B': {
82+
'PerfCounter_0': [60],
83+
'PerfCounter_1': [50],
84+
}
85+
})
86+
self.assertSetEqual(report.names(), set(['BM_A', 'BM_B']))
87+
self.assertSetEqual(report.counters(),
88+
set(['PerfCounter_0', 'PerfCounter_1']))
89+
self.assertEqual(
90+
report.counter_means('BM_A', 'PerfCounter_0'),
91+
(10.488088481701517, 0.7071067811865476))
92+
93+
def test_summarize_results(self):
94+
b_values = benchmark_report.BenchmarkReport(
95+
'foo', base_data, ['PerfCounter_0', 'PerfCounter_1'])
96+
e_values = benchmark_report.BenchmarkReport(
97+
'foo', exp_data, ['PerfCounter_0', 'PerfCounter_1'])
98+
summary = benchmark_report.BenchmarkComparison(b_values, e_values)
99+
self.assertDictEqual(
100+
summary.summarize(), {
101+
'BM_A': {
102+
'PerfCounter_0': (0.19096016504410973, 0.0674199862463242,
103+
0.08333333333333334),
104+
'PerfCounter_1':
105+
(0.4619724131510293, 0.0362738125055006, 0.0674199862463242)
106+
},
107+
'BM_B': {
108+
'PerfCounter_0': (-0.03333333333333366, 0.0, 0.0),
109+
'PerfCounter_1': (-0.0800000000000003, 0.0, 0.0)
110+
}
111+
})
112+
self.assertEqual(
113+
summary.total_improvement('PerfCounter_0'), 0.08566536243319522)
114+
115+
116+
if __name__ == '__main__':
117+
absltest.main()

0 commit comments

Comments
 (0)