Skip to content

Commit 87347af

Browse files
committed
Improve, document and test symbol normalization
1 parent 291bd7b commit 87347af

File tree

1 file changed

+109
-7
lines changed

1 file changed

+109
-7
lines changed

collector/src/artifact_stats.rs

+109-7
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,41 @@ impl ArtifactStats {
102102
}
103103
}
104104

105-
/// Tries to match hashes produces by rustc in mangled symbol names.
106-
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
107-
108105
/// Demangle the symbol and remove rustc mangling hashes.
106+
///
107+
/// Normalizes the following things, in the following order:
108+
/// - Demangles the symbol.
109+
/// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110+
/// into the same symbol.
111+
/// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112+
/// `foo::abcd` -> `foo`.
113+
/// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114+
/// `foo.llvm.123` -> `foo`.
109115
fn normalize_symbol_name(symbol: &str) -> String {
110-
let regex =
111-
RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?").unwrap());
116+
/// Tries to match hashes in brackets produces by rustc in mangled symbol names.
117+
static RUSTC_BRACKET_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
118+
/// Tries to match hashes without brackets after :: produces by rustc in mangled symbol names.
119+
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
120+
/// Tries to match suffixes after a dot.
121+
static DOT_SUFFIX_REGEX: OnceLock<Regex> = OnceLock::new();
122+
123+
let bracket_hash_regex =
124+
RUSTC_BRACKET_HASH_REGEX.get_or_init(|| Regex::new(r"\[[a-z0-9]{13,17}\]").unwrap());
125+
let hash_regex = RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r"::[a-z0-9]{15,17}$").unwrap());
126+
let dot_suffix_regex = DOT_SUFFIX_REGEX.get_or_init(|| Regex::new(r"\.[a-z0-9]+\b").unwrap());
127+
128+
let mut symbol = rustc_demangle::demangle(symbol).to_string();
112129

113-
let symbol = rustc_demangle::demangle(symbol).to_string();
114-
regex.replace_all(&symbol, "").to_string()
130+
if let Some(stripped) = symbol.strip_suffix(".cold") {
131+
symbol = stripped.to_string();
132+
}
133+
if let Some(stripped) = symbol.strip_suffix(".warm") {
134+
symbol = stripped.to_string();
135+
}
136+
let symbol = bracket_hash_regex.replace_all(&symbol, "");
137+
let symbol = hash_regex.replace_all(&symbol, "");
138+
let symbol = dot_suffix_regex.replace_all(&symbol, "");
139+
symbol.to_string()
115140
}
116141

117142
/// Simple heuristic that tries to normalize section names.
@@ -229,3 +254,80 @@ pub fn compile_and_get_stats(
229254

230255
Ok(archives)
231256
}
257+
258+
#[cfg(test)]
259+
mod tests {
260+
use super::*;
261+
use rustc_demangle::demangle;
262+
263+
#[test]
264+
fn normalize_remove_cold_annotation() {
265+
// Check that .cold at the end is removed
266+
check(
267+
"_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold",
268+
"rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold",
269+
"rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace",
270+
);
271+
}
272+
273+
#[test]
274+
fn normalize_remove_numeric_suffix() {
275+
// Check that numeric suffix at the end is removed.
276+
// In this case, it is removed by demangling itself.
277+
check(
278+
"_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645",
279+
"<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments",
280+
"<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments",
281+
);
282+
}
283+
284+
#[test]
285+
fn normalize_remove_numeric_suffix_with_cold() {
286+
// Check that a combination of the .cold suffix and a numeric suffix is removed.
287+
check(
288+
"_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold",
289+
"<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold",
290+
"<rustc_parse::parser::Parser>::parse_ty_common",
291+
);
292+
}
293+
294+
#[test]
295+
fn normalize_hash_at_end() {
296+
// Check that hashes at the end of the symbol are removed.
297+
check(
298+
"anon.58936091071a36b1b82cf536b463328b.3488",
299+
"anon.58936091071a36b1b82cf536b463328b.3488",
300+
"anon",
301+
);
302+
}
303+
304+
#[test]
305+
fn normalize_short_hash() {
306+
// Check that short hashes in brackets are removed.
307+
check(
308+
"_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args",
309+
"rustc_builtin_macros[e293f6447c7da]::format::make_format_args",
310+
"rustc_builtin_macros::format::make_format_args",
311+
);
312+
}
313+
314+
#[test]
315+
fn normalize_hash_without_brackets() {
316+
// Check that hashes withouto brackets are removed.
317+
check(
318+
"_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E",
319+
"proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9",
320+
"proc_macro::quote::quote::{{closure}}",
321+
);
322+
}
323+
324+
/// Checks the result of symbol normalization.
325+
/// The function receives the mangled symbol, and expects the correct demangled
326+
/// symbol and normalized symbol. The demangled version is passed mostly just to make
327+
/// the test more readable.
328+
fn check(symbol: &str, expect_demangled: &str, expect_normalized: &str) {
329+
let demangled = demangle(symbol).to_string();
330+
assert_eq!(demangled, expect_demangled);
331+
assert_eq!(normalize_symbol_name(symbol), expect_normalized.to_string());
332+
}
333+
}

0 commit comments

Comments
 (0)