Skip to content

Improve symbol normalization in binary_stats #1931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 112 additions & 7 deletions collector/src/artifact_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,44 @@ impl ArtifactStats {
}
}

/// Tries to match hashes produces by rustc in mangled symbol names.
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();

/// Demangle the symbol and remove rustc mangling hashes.
///
/// Normalizes the following things, in the following order:
/// - Demangles the symbol.
/// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
/// into the same symbol.
/// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
/// `foo::abcd` -> `foo`.
/// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
/// `foo.llvm.123` -> `foo`.
///
/// These modifications should remove things added by LLVM in the LTO/PGO phase.
/// See more information here: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#vendor-specific-suffix
fn normalize_symbol_name(symbol: &str) -> String {
let regex =
RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?").unwrap());
/// Tries to match hashes in brackets produced by rustc in mangled symbol names.
static RUSTC_BRACKET_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
/// Tries to match hashes without brackets after :: produced by rustc in mangled symbol names.
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
/// Tries to match suffixes after a dot.
static DOT_SUFFIX_REGEX: OnceLock<Regex> = OnceLock::new();

let bracket_hash_regex =
RUSTC_BRACKET_HASH_REGEX.get_or_init(|| Regex::new(r"\[[a-z0-9]{13,17}\]").unwrap());
let hash_regex = RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r"::[a-z0-9]{15,17}$").unwrap());
let dot_suffix_regex = DOT_SUFFIX_REGEX.get_or_init(|| Regex::new(r"\.[a-z0-9]+\b").unwrap());

let mut symbol = rustc_demangle::demangle(symbol).to_string();

let symbol = rustc_demangle::demangle(symbol).to_string();
regex.replace_all(&symbol, "").to_string()
if let Some(stripped) = symbol.strip_suffix(".cold") {
symbol = stripped.to_string();
}
if let Some(stripped) = symbol.strip_suffix(".warm") {
symbol = stripped.to_string();
}
let symbol = bracket_hash_regex.replace_all(&symbol, "");
let symbol = hash_regex.replace_all(&symbol, "");
let symbol = dot_suffix_regex.replace_all(&symbol, "");
symbol.to_string()
}

/// Simple heuristic that tries to normalize section names.
Expand Down Expand Up @@ -229,3 +257,80 @@ pub fn compile_and_get_stats(

Ok(archives)
}

#[cfg(test)]
mod tests {
use super::*;
use rustc_demangle::demangle;

#[test]
fn normalize_remove_cold_annotation() {
// Check that .cold at the end is removed
check(
"_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold",
"rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold",
"rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace",
);
}

#[test]
fn normalize_remove_numeric_suffix() {
// Check that numeric suffix at the end is removed.
// In this case, it is removed by demangling itself.
check(
"_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645",
"<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments",
"<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments",
);
}

#[test]
fn normalize_remove_numeric_suffix_with_cold() {
// Check that a combination of the .cold suffix and a numeric suffix is removed.
check(
"_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold",
"<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold",
"<rustc_parse::parser::Parser>::parse_ty_common",
);
}

#[test]
fn normalize_hash_at_end() {
// Check that hashes at the end of the symbol are removed.
check(
"anon.58936091071a36b1b82cf536b463328b.3488",
"anon.58936091071a36b1b82cf536b463328b.3488",
"anon",
);
}

#[test]
fn normalize_short_hash() {
// Check that short hashes in brackets are removed.
check(
"_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args",
"rustc_builtin_macros[e293f6447c7da]::format::make_format_args",
"rustc_builtin_macros::format::make_format_args",
);
}

#[test]
fn normalize_hash_without_brackets() {
// Check that hashes without brackets are removed.
check(
"_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E",
"proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9",
"proc_macro::quote::quote::{{closure}}",
);
}

/// Checks the result of symbol normalization.
/// The function receives the mangled symbol, and expects the correct demangled
/// symbol and normalized symbol. The demangled version is passed mostly just to make
/// the test more readable.
fn check(symbol: &str, expect_demangled: &str, expect_normalized: &str) {
let demangled = demangle(symbol).to_string();
assert_eq!(demangled, expect_demangled);
assert_eq!(normalize_symbol_name(symbol), expect_normalized.to_string());
}
}
3 changes: 2 additions & 1 deletion collector/src/bin/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use humansize::{format_size, BINARY};
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use tabled::builder::Builder;
use tabled::settings::object::{Columns, Rows};
use tabled::settings::{Alignment, Border, Color, Modify};
use tabled::settings::{Alignment, Border, Color, Modify, Width};
use tokio::runtime::Runtime;

use collector::api::next_artifact::NextArtifact;
Expand Down Expand Up @@ -1486,6 +1486,7 @@ fn print_binary_stats(
}
}

table.with(Modify::new(Columns::first()).with(Width::wrap(80)));
table.with(Modify::new(Columns::new(1..)).with(Alignment::right()));
table.with(tabled::settings::Style::sharp());
table.with(
Expand Down
Loading