@@ -102,16 +102,44 @@ impl ArtifactStats {
102
102
}
103
103
}
104
104
105
- /// Tries to match hashes produces by rustc in mangled symbol names.
106
- static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
107
-
108
105
/// Demangle the symbol and remove rustc mangling hashes.
106
+ ///
107
+ /// Normalizes the following things, in the following order:
108
+ /// - Demangles the symbol.
109
+ /// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110
+ /// into the same symbol.
111
+ /// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112
+ /// `foo::abcd` -> `foo`.
113
+ /// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114
+ /// `foo.llvm.123` -> `foo`.
115
+ ///
116
+ /// These modifications should remove things added by LLVM in the LTO/PGO phase.
117
+ /// See more information here: https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#vendor-specific-suffix
109
118
fn normalize_symbol_name ( symbol : & str ) -> String {
110
- let regex =
111
- RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?" ) . unwrap ( ) ) ;
119
+ /// Tries to match hashes in brackets produced by rustc in mangled symbol names.
120
+ static RUSTC_BRACKET_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
121
+ /// Tries to match hashes without brackets after :: produced by rustc in mangled symbol names.
122
+ static RUSTC_HASH_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
123
+ /// Tries to match suffixes after a dot.
124
+ static DOT_SUFFIX_REGEX : OnceLock < Regex > = OnceLock :: new ( ) ;
125
+
126
+ let bracket_hash_regex =
127
+ RUSTC_BRACKET_HASH_REGEX . get_or_init ( || Regex :: new ( r"\[[a-z0-9]{13,17}\]" ) . unwrap ( ) ) ;
128
+ let hash_regex = RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"::[a-z0-9]{15,17}$" ) . unwrap ( ) ) ;
129
+ let dot_suffix_regex = DOT_SUFFIX_REGEX . get_or_init ( || Regex :: new ( r"\.[a-z0-9]+\b" ) . unwrap ( ) ) ;
130
+
131
+ let mut symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
112
132
113
- let symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ;
114
- regex. replace_all ( & symbol, "" ) . to_string ( )
133
+ if let Some ( stripped) = symbol. strip_suffix ( ".cold" ) {
134
+ symbol = stripped. to_string ( ) ;
135
+ }
136
+ if let Some ( stripped) = symbol. strip_suffix ( ".warm" ) {
137
+ symbol = stripped. to_string ( ) ;
138
+ }
139
+ let symbol = bracket_hash_regex. replace_all ( & symbol, "" ) ;
140
+ let symbol = hash_regex. replace_all ( & symbol, "" ) ;
141
+ let symbol = dot_suffix_regex. replace_all ( & symbol, "" ) ;
142
+ symbol. to_string ( )
115
143
}
116
144
117
145
/// Simple heuristic that tries to normalize section names.
@@ -229,3 +257,80 @@ pub fn compile_and_get_stats(
229
257
230
258
Ok ( archives)
231
259
}
260
+
261
+ #[ cfg( test) ]
262
+ mod tests {
263
+ use super :: * ;
264
+ use rustc_demangle:: demangle;
265
+
266
+ #[ test]
267
+ fn normalize_remove_cold_annotation ( ) {
268
+ // Check that .cold at the end is removed
269
+ check (
270
+ "_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold" ,
271
+ "rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold" ,
272
+ "rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace" ,
273
+ ) ;
274
+ }
275
+
276
+ #[ test]
277
+ fn normalize_remove_numeric_suffix ( ) {
278
+ // Check that numeric suffix at the end is removed.
279
+ // In this case, it is removed by demangling itself.
280
+ check (
281
+ "_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645" ,
282
+ "<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments" ,
283
+ "<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments" ,
284
+ ) ;
285
+ }
286
+
287
+ #[ test]
288
+ fn normalize_remove_numeric_suffix_with_cold ( ) {
289
+ // Check that a combination of the .cold suffix and a numeric suffix is removed.
290
+ check (
291
+ "_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold" ,
292
+ "<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold" ,
293
+ "<rustc_parse::parser::Parser>::parse_ty_common" ,
294
+ ) ;
295
+ }
296
+
297
+ #[ test]
298
+ fn normalize_hash_at_end ( ) {
299
+ // Check that hashes at the end of the symbol are removed.
300
+ check (
301
+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
302
+ "anon.58936091071a36b1b82cf536b463328b.3488" ,
303
+ "anon" ,
304
+ ) ;
305
+ }
306
+
307
+ #[ test]
308
+ fn normalize_short_hash ( ) {
309
+ // Check that short hashes in brackets are removed.
310
+ check (
311
+ "_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args" ,
312
+ "rustc_builtin_macros[e293f6447c7da]::format::make_format_args" ,
313
+ "rustc_builtin_macros::format::make_format_args" ,
314
+ ) ;
315
+ }
316
+
317
+ #[ test]
318
+ fn normalize_hash_without_brackets ( ) {
319
+ // Check that hashes without brackets are removed.
320
+ check (
321
+ "_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E" ,
322
+ "proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9" ,
323
+ "proc_macro::quote::quote::{{closure}}" ,
324
+ ) ;
325
+ }
326
+
327
+ /// Checks the result of symbol normalization.
328
+ /// The function receives the mangled symbol, and expects the correct demangled
329
+ /// symbol and normalized symbol. The demangled version is passed mostly just to make
330
+ /// the test more readable.
331
+ fn check ( symbol : & str , expect_demangled : & str , expect_normalized : & str ) {
332
+ let demangled = demangle ( symbol) . to_string ( ) ;
333
+ assert_eq ! ( demangled, expect_demangled) ;
334
+ assert_eq ! ( normalize_symbol_name( symbol) , expect_normalized. to_string( ) ) ;
335
+ }
336
+ }
0 commit comments