From 1191bea6ab6402bd10ea69cd2aa837331cd452b3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 1 Jun 2023 10:03:27 +1000 Subject: [PATCH] Improve CGU debug printing. - Add more total and per-CGU measurements. - Ensure CGUs are sorted by name before the first `debug_dump` calls, for deterministic output. - Print items within CGUs in sorted-by-name order, for deterministic output. - Add some assertions and comments clarifying sortedness of CGUs at various points. An example, before: ``` INITIAL PARTITIONING (5 CodegenUnits, max=29, min=1, max/min=29.0): CodegenUnit scev95ysd7g4b0z estimated size 2: - fn <() as std::process::Termination>::report [(External, Hidden)] [h082b15a6d07338dcE] estimated size 2 CodegenUnit 1j0frgtl72rsz24q estimated size 29: - fn std::rt::lang_start::<()>::{closure#0} [(External, Hidden)] [h695c7b5d6a212565E] estimated size 17 - fn std::rt::lang_start::<()> [(External, Hidden)] [h4ca942948e9cb931E] estimated size 12 CodegenUnit 5dbzi1e5qm0d7kj2 estimated size 4: - fn <[closure@std::rt::lang_start<()>::{closure#0}] as std::ops::FnOnce<()>>::call_once - shim [(External, Hidden)] [h24eaa44f03b2b233E] estimated size 1 - fn >::call_once - shim(fn()) [(External, Hidden)] [hf338f5339c3711acE] estimated size 1 - fn <[closure@std::rt::lang_start<()>::{closure#0}] as std::ops::FnOnce<()>>::call_once - shim(vtable) [(External, Hidden)] [h595d414cbb7651d5E] estimated size 1 - fn std::ptr::drop_in_place::<[closure@std::rt::lang_start<()>::{closure#0}]> - shim(None) [(External, Hidden)] [h17a19dcdb40600daE] estimated size 1 CodegenUnit 220m1mqa2mlbg7r3 estimated size 1: - fn main [(External, Hidden)] [hb29587cdb6db5f42E] estimated size 1 CodegenUnit 4ulbh241f7tvyn7x estimated size 6: - fn std::sys_common::backtrace::__rust_begin_short_backtrace:: [(External, Hidden)] [h41dada2c21a1259dE] estimated size 6 ``` and after: ``` INITIAL PARTITIONING (9 items, total_size=42; 5 CGUs, max_size=29, min_size=1, max_size/min_size=29.0): - CGU[0] 1j0frgtl72rsz24q (2 items, size=29): - fn std::rt::lang_start::<()> [(External, Hidden)] [h4ca942948e9cb931E] (size=12) - fn std::rt::lang_start::<()>::{closure#0} [(External, Hidden)] [h695c7b5d6a212565E] (size=17) - CGU[1] 220m1mqa2mlbg7r3 (1 items, size=1): - fn main [(External, Hidden)] [hb29587cdb6db5f42E] (size=1) - CGU[2] 4ulbh241f7tvyn7x (1 items, size=6): - fn std::sys_common::backtrace::__rust_begin_short_backtrace:: [(External, Hidden)] [h41dada2c21a1259dE] (size=6) - CGU[3] 5dbzi1e5qm0d7kj2 (4 items, size=4): - fn <[closure@std::rt::lang_start<()>::{closure#0}] as std::ops::FnOnce<()>>::call_once - shim(vtable) [(External, Hidden)] [h595d414cbb7651d5E] (size=1) - fn <[closure@std::rt::lang_start<()>::{closure#0}] as std::ops::FnOnce<()>>::call_once - shim [(External, Hidden)] [h24eaa44f03b2b233E] (size=1) - fn >::call_once - shim(fn()) [(External, Hidden)] [hf338f5339c3711acE] (size=1) - fn std::ptr::drop_in_place::<[closure@std::rt::lang_start<()>::{closure#0}]> - shim(None) [(External, Hidden)] [h17a19dcdb40600daE] (size=1) - CGU[4] scev95ysd7g4b0z (1 items, size=2): - fn <() as std::process::Termination>::report [(External, Hidden)] [h082b15a6d07338dcE] (size=2) ``` --- compiler/rustc_middle/src/mir/mono.rs | 2 + compiler/rustc_monomorphize/src/lib.rs | 1 + .../rustc_monomorphize/src/partitioning.rs | 68 +++++++++++-------- 3 files changed, 43 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index f31b343c94704..1511e25523559 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -291,10 +291,12 @@ impl<'tcx> CodegenUnit<'tcx> { self.primary = true; } + /// The order of these items is non-determinstic. pub fn items(&self) -> &FxHashMap, (Linkage, Visibility)> { &self.items } + /// The order of these items is non-determinstic. pub fn items_mut(&mut self) -> &mut FxHashMap, (Linkage, Visibility)> { &mut self.items } diff --git a/compiler/rustc_monomorphize/src/lib.rs b/compiler/rustc_monomorphize/src/lib.rs index ecc50c3f664fd..89dadc782f2da 100644 --- a/compiler/rustc_monomorphize/src/lib.rs +++ b/compiler/rustc_monomorphize/src/lib.rs @@ -1,4 +1,5 @@ #![feature(array_windows)] +#![feature(is_sorted)] #![recursion_limit = "256"] #![allow(rustc::potential_query_instability)] #![deny(rustc::untranslatable_diagnostic)] diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index be9c349c38416..1b872484eb1e2 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -126,11 +126,14 @@ struct PartitioningCx<'a, 'tcx> { } struct PlacedRootMonoItems<'tcx> { + /// The codegen units, sorted by name to make things deterministic. codegen_units: Vec>, + roots: FxHashSet>, internalization_candidates: FxHashSet>, } +// The output CGUs are sorted by name. fn partition<'tcx, I>( tcx: TyCtxt<'tcx>, mono_items: &mut I, @@ -143,6 +146,7 @@ where let _prof_timer = tcx.prof.generic_activity("cgu_partitioning"); let cx = &PartitioningCx { tcx, target_cgu_count: max_cgu_count, inlining_map }; + // In the first step, we place all regular monomorphizations into their // respective 'home' codegen unit. Regular monomorphizations are all // functions and statics defined in the local crate. @@ -225,8 +229,8 @@ where dead_code_cgu.make_code_coverage_dead_code_cgu(); } - // Finally, sort by codegen unit name, so that we get deterministic results. - codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); + // Ensure CGUs are sorted by name, so that we get deterministic results. + assert!(codegen_units.is_sorted_by(|a, b| Some(a.name().as_str().cmp(b.name().as_str())))); debug_dump(tcx, "FINAL", &codegen_units); @@ -301,27 +305,22 @@ where codegen_units.insert(codegen_unit_name, CodegenUnit::new(codegen_unit_name)); } - let codegen_units = codegen_units.into_values().collect(); + let mut codegen_units: Vec<_> = codegen_units.into_values().collect(); + codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); + PlacedRootMonoItems { codegen_units, roots, internalization_candidates } } +// This function requires the CGUs to be sorted by name on input, and ensures +// they are sorted by name on return, for deterministic behaviour. fn merge_codegen_units<'tcx>( cx: &PartitioningCx<'_, 'tcx>, codegen_units: &mut Vec>, ) { assert!(cx.target_cgu_count >= 1); - // Note that at this point in time the `codegen_units` here may not be - // in a deterministic order (but we know they're deterministically the - // same set). We want this merging to produce a deterministic ordering - // of codegen units from the input. - // - // Due to basically how we've implemented the merging below (merge the - // two smallest into each other) we're sure to start off with a - // deterministic order (sorted by name). This'll mean that if two cgus - // have the same size the stable sort below will keep everything nice - // and deterministic. - codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); + // A sorted order here ensures merging is deterministic. + assert!(codegen_units.is_sorted_by(|a, b| Some(a.name().as_str().cmp(b.name().as_str())))); // This map keeps track of what got merged into what. let mut cgu_contents: FxHashMap> = @@ -400,6 +399,9 @@ fn merge_codegen_units<'tcx>( cgu.set_name(numbered_codegen_unit_name); } } + + // A sorted order here ensures what follows can be deterministic. + codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); } /// For symbol internalization, we need to know whether a symbol/mono-item is @@ -858,36 +860,46 @@ fn default_visibility(tcx: TyCtxt<'_>, id: DefId, is_generic: bool) -> Visibilit _ => Visibility::Hidden, } } + fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<'tcx>]) { let dump = move || { use std::fmt::Write; let num_cgus = cgus.len(); - let max = cgus.iter().map(|cgu| cgu.size_estimate()).max().unwrap(); - let min = cgus.iter().map(|cgu| cgu.size_estimate()).min().unwrap(); - let ratio = max as f64 / min as f64; + let num_items: usize = cgus.iter().map(|cgu| cgu.items().len()).sum(); + let total_size: usize = cgus.iter().map(|cgu| cgu.size_estimate()).sum(); + let max_size = cgus.iter().map(|cgu| cgu.size_estimate()).max().unwrap(); + let min_size = cgus.iter().map(|cgu| cgu.size_estimate()).min().unwrap(); + let max_min_size_ratio = max_size as f64 / min_size as f64; let s = &mut String::new(); let _ = writeln!( s, - "{label} ({num_cgus} CodegenUnits, max={max}, min={min}, max/min={ratio:.1}):" + "{label} ({num_items} items, total_size={total_size}; {num_cgus} CGUs, \ + max_size={max_size}, min_size={min_size}, max_size/min_size={max_min_size_ratio:.1}):" ); - for cgu in cgus { - let _ = - writeln!(s, "CodegenUnit {} estimated size {}:", cgu.name(), cgu.size_estimate()); + for (i, cgu) in cgus.iter().enumerate() { + let num_items = cgu.items().len(); + let _ = writeln!( + s, + "- CGU[{i}] {} ({num_items} items, size={}):", + cgu.name(), + cgu.size_estimate() + ); - for (mono_item, linkage) in cgu.items() { - let symbol_name = mono_item.symbol_name(tcx).name; + // The order of `cgu.items()` is non-deterministic; sort it by name + // to give deterministic output. + let mut items: Vec<_> = cgu.items().iter().collect(); + items.sort_by_key(|(item, _)| item.symbol_name(tcx).name); + for (item, linkage) in items { + let symbol_name = item.symbol_name(tcx).name; let symbol_hash_start = symbol_name.rfind('h'); let symbol_hash = symbol_hash_start.map_or("", |i| &symbol_name[i..]); + let size = item.size_estimate(tcx); let _ = with_no_trimmed_paths!(writeln!( s, - " - {} [{:?}] [{}] estimated size {}", - mono_item, - linkage, - symbol_hash, - mono_item.size_estimate(tcx) + " - {item} [{linkage:?}] [{symbol_hash}] (size={size})" )); }