Skip to content

Commit 47dcf23

Browse files
authored
Merge pull request #1931 from Kobzol/binary-stats-improve
Improve symbol normalization in `binary_stats`
2 parents d5055e7 + c1216b5 commit 47dcf23

File tree

2 files changed

+114
-8
lines changed

2 files changed

+114
-8
lines changed

collector/src/artifact_stats.rs

+112-7
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,44 @@ impl ArtifactStats {
102102
}
103103
}
104104

105-
/// Tries to match hashes produces by rustc in mangled symbol names.
106-
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
107-
108105
/// Demangle the symbol and remove rustc mangling hashes.
106+
///
107+
/// Normalizes the following things, in the following order:
108+
/// - Demangles the symbol.
109+
/// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110+
/// into the same symbol.
111+
/// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112+
/// `foo::abcd` -> `foo`.
113+
/// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114+
/// `foo.llvm.123` -> `foo`.
115+
///
116+
/// These modifications should remove things added by LLVM in the LTO/PGO phase.
117+
/// See more information here: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#vendor-specific-suffix
109118
fn normalize_symbol_name(symbol: &str) -> String {
110-
let regex =
111-
RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?").unwrap());
119+
/// Tries to match hashes in brackets produced by rustc in mangled symbol names.
120+
static RUSTC_BRACKET_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
121+
/// Tries to match hashes without brackets after :: produced by rustc in mangled symbol names.
122+
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
123+
/// Tries to match suffixes after a dot.
124+
static DOT_SUFFIX_REGEX: OnceLock<Regex> = OnceLock::new();
125+
126+
let bracket_hash_regex =
127+
RUSTC_BRACKET_HASH_REGEX.get_or_init(|| Regex::new(r"\[[a-z0-9]{13,17}\]").unwrap());
128+
let hash_regex = RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r"::[a-z0-9]{15,17}$").unwrap());
129+
let dot_suffix_regex = DOT_SUFFIX_REGEX.get_or_init(|| Regex::new(r"\.[a-z0-9]+\b").unwrap());
130+
131+
let mut symbol = rustc_demangle::demangle(symbol).to_string();
112132

113-
let symbol = rustc_demangle::demangle(symbol).to_string();
114-
regex.replace_all(&symbol, "").to_string()
133+
if let Some(stripped) = symbol.strip_suffix(".cold") {
134+
symbol = stripped.to_string();
135+
}
136+
if let Some(stripped) = symbol.strip_suffix(".warm") {
137+
symbol = stripped.to_string();
138+
}
139+
let symbol = bracket_hash_regex.replace_all(&symbol, "");
140+
let symbol = hash_regex.replace_all(&symbol, "");
141+
let symbol = dot_suffix_regex.replace_all(&symbol, "");
142+
symbol.to_string()
115143
}
116144

117145
/// Simple heuristic that tries to normalize section names.
@@ -229,3 +257,80 @@ pub fn compile_and_get_stats(
229257

230258
Ok(archives)
231259
}
260+
261+
#[cfg(test)]
262+
mod tests {
263+
use super::*;
264+
use rustc_demangle::demangle;
265+
266+
#[test]
267+
fn normalize_remove_cold_annotation() {
268+
// Check that .cold at the end is removed
269+
check(
270+
"_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold",
271+
"rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold",
272+
"rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace",
273+
);
274+
}
275+
276+
#[test]
277+
fn normalize_remove_numeric_suffix() {
278+
// Check that numeric suffix at the end is removed.
279+
// In this case, it is removed by demangling itself.
280+
check(
281+
"_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645",
282+
"<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments",
283+
"<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments",
284+
);
285+
}
286+
287+
#[test]
288+
fn normalize_remove_numeric_suffix_with_cold() {
289+
// Check that a combination of the .cold suffix and a numeric suffix is removed.
290+
check(
291+
"_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold",
292+
"<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold",
293+
"<rustc_parse::parser::Parser>::parse_ty_common",
294+
);
295+
}
296+
297+
#[test]
298+
fn normalize_hash_at_end() {
299+
// Check that hashes at the end of the symbol are removed.
300+
check(
301+
"anon.58936091071a36b1b82cf536b463328b.3488",
302+
"anon.58936091071a36b1b82cf536b463328b.3488",
303+
"anon",
304+
);
305+
}
306+
307+
#[test]
308+
fn normalize_short_hash() {
309+
// Check that short hashes in brackets are removed.
310+
check(
311+
"_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args",
312+
"rustc_builtin_macros[e293f6447c7da]::format::make_format_args",
313+
"rustc_builtin_macros::format::make_format_args",
314+
);
315+
}
316+
317+
#[test]
318+
fn normalize_hash_without_brackets() {
319+
// Check that hashes without brackets are removed.
320+
check(
321+
"_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E",
322+
"proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9",
323+
"proc_macro::quote::quote::{{closure}}",
324+
);
325+
}
326+
327+
/// Checks the result of symbol normalization.
328+
/// The function receives the mangled symbol, and expects the correct demangled
329+
/// symbol and normalized symbol. The demangled version is passed mostly just to make
330+
/// the test more readable.
331+
fn check(symbol: &str, expect_demangled: &str, expect_normalized: &str) {
332+
let demangled = demangle(symbol).to_string();
333+
assert_eq!(demangled, expect_demangled);
334+
assert_eq!(normalize_symbol_name(symbol), expect_normalized.to_string());
335+
}
336+
}

collector/src/bin/collector.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use humansize::{format_size, BINARY};
2323
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
2424
use tabled::builder::Builder;
2525
use tabled::settings::object::{Columns, Rows};
26-
use tabled::settings::{Alignment, Border, Color, Modify};
26+
use tabled::settings::{Alignment, Border, Color, Modify, Width};
2727
use tokio::runtime::Runtime;
2828

2929
use collector::api::next_artifact::NextArtifact;
@@ -1486,6 +1486,7 @@ fn print_binary_stats(
14861486
}
14871487
}
14881488

1489+
table.with(Modify::new(Columns::first()).with(Width::wrap(80)));
14891490
table.with(Modify::new(Columns::new(1..)).with(Alignment::right()));
14901491
table.with(tabled::settings::Style::sharp());
14911492
table.with(

0 commit comments

Comments
 (0)