@@ -102,16 +102,41 @@ impl ArtifactStats {
102
102
}
103
103
}
104
104
105
- /// Tries to match hashes produces by rustc in mangled symbol names.
106
- static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
107
-
108
105
/// Demangle the symbol and remove rustc mangling hashes.
106
+ ///
107
+ /// Normalizes the following things, in the following order:
108
+ /// - Demangles the symbol.
109
+ /// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110
+ /// into the same symbol.
111
+ /// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112
+ /// `foo::abcd` -> `foo`.
113
+ /// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114
+ /// `foo.llvm.123` -> `foo`.
109
115
fn normalize_symbol_name ( symbol : & str ) -> String {
110
- let regex =
111
- RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?" ) . unwrap ( ) ) ;
116
+ /// Tries to match hashes in brackets produces by rustc in mangled symbol names.
117
+ static RUSTC_BRACKET_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
118
+ /// Tries to match hashes without brackets after :: produces by rustc in mangled symbol names.
119
+ static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
120
+ /// Tries to match suffixes after a dot.
121
+ static DOT_SUFFIX_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
122
+
123
+ let bracket_hash_regex =
124
+ RUSTC_BRACKET_HASH_REGEX . get_or_init ( || Regex :: new ( r"\[[a-z0-9]{13,17}\]" ) . unwrap ( ) ) ;
125
+ let hash_regex = RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"::[a-z0-9]{15,17}$" ) . unwrap ( ) ) ;
126
+ let dot_suffix_regex = DOT_SUFFIX_REGEX . get_or_init ( || Regex :: new ( r"\.[a-z0-9]+\b" ) . unwrap ( ) ) ;
127
+
128
+ let mut symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
112
129
113
- let symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
114
- regex. replace_all ( & symbol, "" ) . to_string ( )
130
+ if let Some ( stripped) = symbol. strip_suffix ( ".cold" ) {
131
+ symbol = stripped. to_string ( ) ;
132
+ }
133
+ if let Some ( stripped) = symbol. strip_suffix ( ".warm" ) {
134
+ symbol = stripped. to_string ( ) ;
135
+ }
136
+ let symbol = bracket_hash_regex. replace_all ( & symbol, "" ) ;
137
+ let symbol = hash_regex. replace_all ( & symbol, "" ) ;
138
+ let symbol = dot_suffix_regex. replace_all ( & symbol, "" ) ;
139
+ symbol. to_string ( )
115
140
}
116
141
117
142
/// Simple heuristic that tries to normalize section names.
@@ -229,3 +254,80 @@ pub fn compile_and_get_stats(
229
254
230
255
Ok ( archives)
231
256
}
257
+
258
+ #[ cfg( test) ]
259
+ mod tests {
260
+ use super :: * ;
261
+ use rustc_demangle:: demangle;
262
+
263
+ #[ test]
264
+ fn normalize_remove_cold_annotation ( ) {
265
+ // Check that .cold at the end is removed
266
+ check (
267
+ "_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold" ,
268
+ "rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold" ,
269
+ "rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace" ,
270
+ ) ;
271
+ }
272
+
273
+ #[ test]
274
+ fn normalize_remove_numeric_suffix ( ) {
275
+ // Check that numeric suffix at the end is removed.
276
+ // In this case, it is removed by demangling itself.
277
+ check (
278
+ "_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645" ,
279
+ "<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments" ,
280
+ "<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments" ,
281
+ ) ;
282
+ }
283
+
284
+ #[ test]
285
+ fn normalize_remove_numeric_suffix_with_cold ( ) {
286
+ // Check that a combination of the .cold suffix and a numeric suffix is removed.
287
+ check (
288
+ "_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold" ,
289
+ "<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold" ,
290
+ "<rustc_parse::parser::Parser>::parse_ty_common" ,
291
+ ) ;
292
+ }
293
+
294
+ #[ test]
295
+ fn normalize_hash_at_end ( ) {
296
+ // Check that hashes at the end of the symbol are removed.
297
+ check (
298
+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
299
+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
300
+ "anon" ,
301
+ ) ;
302
+ }
303
+
304
+ #[ test]
305
+ fn normalize_short_hash ( ) {
306
+ // Check that short hashes in brackets are removed.
307
+ check (
308
+ "_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args" ,
309
+ "rustc_builtin_macros[e293f6447c7da]::format::make_format_args" ,
310
+ "rustc_builtin_macros::format::make_format_args" ,
311
+ ) ;
312
+ }
313
+
314
+ #[ test]
315
+ fn normalize_hash_without_brackets ( ) {
316
+ // Check that hashes withouto brackets are removed.
317
+ check (
318
+ "_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E" ,
319
+ "proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9" ,
320
+ "proc_macro::quote::quote::{{closure}}" ,
321
+ ) ;
322
+ }
323
+
324
+ /// Checks the result of symbol normalization.
325
+ /// The function receives the mangled symbol, and expects the correct demangled
326
+ /// symbol and normalized symbol. The demangled version is passed mostly just to make
327
+ /// the test more readable.
328
+ fn check ( symbol : & str , expect_demangled : & str , expect_normalized : & str ) {
329
+ let demangled = demangle ( symbol) . to_string ( ) ;
330
+ assert_eq ! ( demangled, expect_demangled) ;
331
+ assert_eq ! ( normalize_symbol_name( symbol) , expect_normalized. to_string( ) ) ;
332
+ }
333
+ }
0 commit comments