Skip to content

Commit 74d018e

Browse files
authored
Rollup merge of #107733 - Kobzol:opt-build-metrics, r=Mark-Simulacrum
Store metrics from `metrics.json` to CI PGO timer With this change, we'll be able to easily see how long does it take to compile LLVM vs `rustc`. r? ```@Mark-Simulacrum```
2 parents 3549e42 + afe9553 commit 74d018e

File tree

1 file changed

+124
-34
lines changed

1 file changed

+124
-34
lines changed

src/ci/stage-build.py

+124-34
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import contextlib
77
import getpass
88
import glob
9+
import json
910
import logging
1011
import os
1112
import pprint
@@ -17,7 +18,8 @@
1718
import urllib.request
1819
from io import StringIO
1920
from pathlib import Path
20-
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
21+
from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \
22+
Tuple, Union
2123

2224
PGO_HOST = os.environ["PGO_HOST"]
2325

@@ -115,6 +117,9 @@ def supports_bolt(self) -> bool:
115117
def llvm_bolt_profile_merged_file(self) -> Path:
116118
return self.opt_artifacts() / "bolt.profdata"
117119

120+
def metrics_path(self) -> Path:
121+
return self.build_root() / "build" / "metrics.json"
122+
118123

119124
class LinuxPipeline(Pipeline):
120125
def checkout_path(self) -> Path:
@@ -208,32 +213,27 @@ def get_timestamp() -> float:
208213

209214

210215
Duration = float
211-
TimerSection = Union[Duration, "Timer"]
212216

213217

214-
def iterate_sections(section: TimerSection, name: str, level: int = 0) -> Iterator[
218+
def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[
215219
Tuple[int, str, Duration]]:
216220
"""
217-
Hierarchically iterate the sections of a timer, in a depth-first order.
221+
Hierarchically iterate the children of a timer, in a depth-first order.
218222
"""
219-
if isinstance(section, Duration):
220-
yield (level, name, section)
221-
elif isinstance(section, Timer):
222-
yield (level, name, section.total_duration())
223-
for (child_name, child_section) in section.sections:
224-
yield from iterate_sections(child_section, child_name, level=level + 1)
225-
else:
226-
assert False
223+
yield (level, name, timer.total_duration())
224+
for (child_name, child_timer) in timer.children:
225+
yield from iterate_timers(child_timer, child_name, level=level + 1)
227226

228227

229228
class Timer:
230229
def __init__(self, parent_names: Tuple[str, ...] = ()):
231-
self.sections: List[Tuple[str, TimerSection]] = []
230+
self.children: List[Tuple[str, Timer]] = []
232231
self.section_active = False
233232
self.parent_names = parent_names
233+
self.duration_excluding_children: Duration = 0
234234

235235
@contextlib.contextmanager
236-
def section(self, name: str) -> "Timer":
236+
def section(self, name: str) -> ContextManager["Timer"]:
237237
assert not self.section_active
238238
self.section_active = True
239239

@@ -252,33 +252,26 @@ def section(self, name: str) -> "Timer":
252252
end = get_timestamp()
253253
duration = end - start
254254

255-
if child_timer.has_children():
256-
self.sections.append((name, child_timer))
257-
else:
258-
self.sections.append((name, duration))
255+
child_timer.duration_excluding_children = duration - child_timer.total_duration()
256+
self.add_child(name, child_timer)
259257
if exc is None:
260258
LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)")
261259
else:
262260
LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)")
263261
self.section_active = False
264262

265263
def total_duration(self) -> Duration:
266-
duration = 0
267-
for (_, section) in self.sections:
268-
if isinstance(section, Duration):
269-
duration += section
270-
else:
271-
duration += section.total_duration()
272-
return duration
264+
return self.duration_excluding_children + sum(
265+
c.total_duration() for (_, c) in self.children)
273266

274267
def has_children(self) -> bool:
275-
return len(self.sections) > 0
268+
return len(self.children) > 0
276269

277270
def print_stats(self):
278271
rows = []
279-
for (child_name, child_section) in self.sections:
280-
for (level, name, duration) in iterate_sections(child_section, child_name, level=0):
281-
label = f"{' ' * level}{name}:"
272+
for (child_name, child_timer) in self.children:
273+
for (level, name, duration) in iterate_timers(child_timer, child_name, level=0):
274+
label = f"{' ' * level}{name}:"
282275
rows.append((label, duration))
283276

284277
# Empty row
@@ -306,6 +299,60 @@ def print_stats(self):
306299
print(divider, file=output, end="")
307300
LOGGER.info(f"Timer results\n{output.getvalue()}")
308301

302+
def add_child(self, name: str, timer: "Timer"):
303+
self.children.append((name, timer))
304+
305+
def add_duration(self, name: str, duration: Duration):
306+
timer = Timer(parent_names=self.parent_names + (name,))
307+
timer.duration_excluding_children = duration
308+
self.add_child(name, timer)
309+
310+
311+
class BuildStep:
312+
def __init__(self, type: str, children: List["BuildStep"], duration: float):
313+
self.type = type
314+
self.children = children
315+
self.duration = duration
316+
317+
def find_all_by_type(self, type: str) -> Iterator["BuildStep"]:
318+
if type == self.type:
319+
yield self
320+
for child in self.children:
321+
yield from child.find_all_by_type(type)
322+
323+
def __repr__(self):
324+
return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})"
325+
326+
327+
def load_last_metrics(path: Path) -> BuildStep:
328+
"""
329+
Loads the metrics of the most recent bootstrap execution from a metrics.json file.
330+
"""
331+
with open(path, "r") as f:
332+
metrics = json.load(f)
333+
invocation = metrics["invocations"][-1]
334+
335+
def parse(entry) -> Optional[BuildStep]:
336+
if "kind" not in entry or entry["kind"] != "rustbuild_step":
337+
return None
338+
type = entry.get("type", "")
339+
duration = entry.get("duration_excluding_children_sec", 0)
340+
children = []
341+
342+
for child in entry.get("children", ()):
343+
step = parse(child)
344+
if step is not None:
345+
children.append(step)
346+
duration += step.duration
347+
return BuildStep(type=type, children=children, duration=duration)
348+
349+
children = [parse(child) for child in invocation.get("children", ())]
350+
return BuildStep(
351+
type="root",
352+
children=children,
353+
duration=invocation.get("duration_including_children_sec", 0)
354+
)
355+
309356

310357
@contextlib.contextmanager
311358
def change_cwd(dir: Path):
@@ -645,7 +692,7 @@ def print_binary_sizes(pipeline: Pipeline):
645692
with StringIO() as output:
646693
for path in paths:
647694
path_str = f"{path.name}:"
648-
print(f"{path_str:<30}{format_bytes(path.stat().st_size):>14}", file=output)
695+
print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output)
649696
LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
650697

651698

@@ -659,6 +706,44 @@ def print_free_disk_space(pipeline: Pipeline):
659706
f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)")
660707

661708

709+
def log_metrics(step: BuildStep):
710+
substeps: List[Tuple[int, BuildStep]] = []
711+
712+
def visit(step: BuildStep, level: int):
713+
substeps.append((level, step))
714+
for child in step.children:
715+
visit(child, level=level + 1)
716+
717+
visit(step, 0)
718+
719+
output = StringIO()
720+
for (level, step) in substeps:
721+
label = f"{'.' * level}{step.type}"
722+
print(f"{label:<65}{step.duration:>8.2f}s", file=output)
723+
logging.info(f"Build step durations\n{output.getvalue()}")
724+
725+
726+
def record_metrics(pipeline: Pipeline, timer: Timer):
727+
metrics = load_last_metrics(pipeline.metrics_path())
728+
if metrics is None:
729+
return
730+
llvm_steps = tuple(metrics.find_all_by_type("bootstrap::native::Llvm"))
731+
assert len(llvm_steps) > 0
732+
llvm_duration = sum(step.duration for step in llvm_steps)
733+
734+
rustc_steps = tuple(metrics.find_all_by_type("bootstrap::compile::Rustc"))
735+
assert len(rustc_steps) > 0
736+
rustc_duration = sum(step.duration for step in rustc_steps)
737+
738+
# The LLVM step is part of the Rustc step
739+
rustc_duration -= llvm_duration
740+
741+
timer.add_duration("LLVM", llvm_duration)
742+
timer.add_duration("Rustc", rustc_duration)
743+
744+
log_metrics(metrics)
745+
746+
662747
def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: List[str]):
663748
# Clear and prepare tmp directory
664749
shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
@@ -668,12 +753,13 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
668753

669754
# Stage 1: Build rustc + PGO instrumented LLVM
670755
with timer.section("Stage 1 (LLVM PGO)") as stage1:
671-
with stage1.section("Build rustc and LLVM"):
756+
with stage1.section("Build rustc and LLVM") as rustc_build:
672757
build_rustc(pipeline, args=[
673758
"--llvm-profile-generate"
674759
], env=dict(
675760
LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
676761
))
762+
record_metrics(pipeline, rustc_build)
677763

678764
with stage1.section("Gather profiles"):
679765
gather_llvm_profiles(pipeline)
@@ -687,11 +773,12 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
687773

688774
# Stage 2: Build PGO instrumented rustc + LLVM
689775
with timer.section("Stage 2 (rustc PGO)") as stage2:
690-
with stage2.section("Build rustc and LLVM"):
776+
with stage2.section("Build rustc and LLVM") as rustc_build:
691777
build_rustc(pipeline, args=[
692778
"--rust-profile-generate",
693779
pipeline.rustc_profile_dir_root()
694780
])
781+
record_metrics(pipeline, rustc_build)
695782

696783
with stage2.section("Gather profiles"):
697784
gather_rustc_profiles(pipeline)
@@ -706,12 +793,14 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
706793
# Stage 3: Build rustc + BOLT instrumented LLVM
707794
if pipeline.supports_bolt():
708795
with timer.section("Stage 3 (LLVM BOLT)") as stage3:
709-
with stage3.section("Build rustc and LLVM"):
796+
with stage3.section("Build rustc and LLVM") as rustc_build:
710797
build_rustc(pipeline, args=[
711798
"--llvm-profile-use",
712799
pipeline.llvm_profile_merged_file(),
713800
"--llvm-bolt-profile-generate",
714801
])
802+
record_metrics(pipeline, rustc_build)
803+
715804
with stage3.section("Gather profiles"):
716805
gather_llvm_bolt_profiles(pipeline)
717806

@@ -723,8 +812,9 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
723812
]
724813

725814
# Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
726-
with timer.section("Stage 4 (final build)"):
815+
with timer.section("Stage 4 (final build)") as stage4:
727816
cmd(final_build_args)
817+
record_metrics(pipeline, stage4)
728818

729819

730820
if __name__ == "__main__":

0 commit comments

Comments
 (0)