@@ -102,16 +102,41 @@ impl ArtifactStats {
102102    } 
103103} 
104104
105- /// Tries to match hashes produces by rustc in mangled symbol names. 
106- static  RUSTC_HASH_REGEX :  OnceLock < Regex >  = OnceLock :: new ( ) ; 
107- 
108105/// Demangle the symbol and remove rustc mangling hashes. 
106+ /// 
107+ /// Normalizes the following things, in the following order: 
108+ /// - Demangles the symbol. 
109+ /// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function 
110+ /// into the same symbol. 
111+ /// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or 
112+ /// `foo::abcd` -> `foo`. 
113+ /// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or 
114+ /// `foo.llvm.123` -> `foo`. 
109115fn  normalize_symbol_name ( symbol :  & str )  -> String  { 
110-     let  regex =
111-         RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?" ) . unwrap ( ) ) ; 
116+     /// Tries to match hashes in brackets produces by rustc in mangled symbol names. 
117+ static  RUSTC_BRACKET_HASH_REGEX :  OnceLock < Regex >  = OnceLock :: new ( ) ; 
118+     /// Tries to match hashes without brackets after :: produces by rustc in mangled symbol names. 
119+ static  RUSTC_HASH_REGEX :  OnceLock < Regex >  = OnceLock :: new ( ) ; 
120+     /// Tries to match suffixes after a dot. 
121+ static  DOT_SUFFIX_REGEX :  OnceLock < Regex >  = OnceLock :: new ( ) ; 
122+ 
123+     let  bracket_hash_regex =
124+         RUSTC_BRACKET_HASH_REGEX . get_or_init ( || Regex :: new ( r"\[[a-z0-9]{13,17}\]" ) . unwrap ( ) ) ; 
125+     let  hash_regex = RUSTC_HASH_REGEX . get_or_init ( || Regex :: new ( r"::[a-z0-9]{15,17}$" ) . unwrap ( ) ) ; 
126+     let  dot_suffix_regex = DOT_SUFFIX_REGEX . get_or_init ( || Regex :: new ( r"\.[a-z0-9]+\b" ) . unwrap ( ) ) ; 
127+ 
128+     let  mut  symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ; 
112129
113-     let  symbol = rustc_demangle:: demangle ( symbol) . to_string ( ) ; 
114-     regex. replace_all ( & symbol,  "" ) . to_string ( ) 
130+     if  let  Some ( stripped)  = symbol. strip_suffix ( ".cold" )  { 
131+         symbol = stripped. to_string ( ) ; 
132+     } 
133+     if  let  Some ( stripped)  = symbol. strip_suffix ( ".warm" )  { 
134+         symbol = stripped. to_string ( ) ; 
135+     } 
136+     let  symbol = bracket_hash_regex. replace_all ( & symbol,  "" ) ; 
137+     let  symbol = hash_regex. replace_all ( & symbol,  "" ) ; 
138+     let  symbol = dot_suffix_regex. replace_all ( & symbol,  "" ) ; 
139+     symbol. to_string ( ) 
115140} 
116141
117142/// Simple heuristic that tries to normalize section names. 
@@ -229,3 +254,80 @@ pub fn compile_and_get_stats(
229254
230255    Ok ( archives) 
231256} 
257+ 
258+ #[ cfg( test) ]  
259+ mod  tests { 
260+     use  super :: * ; 
261+     use  rustc_demangle:: demangle; 
262+ 
263+     #[ test]  
264+     fn  normalize_remove_cold_annotation ( )  { 
265+         // Check that .cold at the end is removed 
266+         check ( 
267+             "_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold" , 
268+             "rustc_query_impl[16af0aa4f1d40934]::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace.cold" , 
269+             "rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace" , 
270+         ) ; 
271+     } 
272+ 
273+     #[ test]  
274+     fn  normalize_remove_numeric_suffix ( )  { 
275+         // Check that numeric suffix at the end is removed. 
276+         // In this case, it is removed by demangling itself. 
277+         check ( 
278+             "_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645" , 
279+             "<regex_syntax[48ff133cf18e629c]::ast::parse::ParserI<&mut regex_syntax[48ff133cf18e629c]::ast::parse::Parser>>::parse_with_comments" , 
280+             "<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments" , 
281+         ) ; 
282+     } 
283+ 
284+     #[ test]  
285+     fn  normalize_remove_numeric_suffix_with_cold ( )  { 
286+         // Check that a combination of the .cold suffix and a numeric suffix is removed. 
287+         check ( 
288+             "_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold" , 
289+             "<rustc_parse[45fe911b13bda40a]::parser::Parser>::parse_ty_common.llvm.13047176952295404880.cold" , 
290+             "<rustc_parse::parser::Parser>::parse_ty_common" , 
291+         ) ; 
292+     } 
293+ 
294+     #[ test]  
295+     fn  normalize_hash_at_end ( )  { 
296+         // Check that hashes at the end of the symbol are removed. 
297+         check ( 
298+             "anon.58936091071a36b1b82cf536b463328b.3488" , 
299+             "anon.58936091071a36b1b82cf536b463328b.3488" , 
300+             "anon" , 
301+         ) ; 
302+     } 
303+ 
304+     #[ test]  
305+     fn  normalize_short_hash ( )  { 
306+         // Check that short hashes in brackets are removed. 
307+         check ( 
308+             "_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args" , 
309+             "rustc_builtin_macros[e293f6447c7da]::format::make_format_args" , 
310+             "rustc_builtin_macros::format::make_format_args" , 
311+         ) ; 
312+     } 
313+ 
314+     #[ test]  
315+     fn  normalize_hash_without_brackets ( )  { 
316+         // Check that hashes withouto brackets are removed. 
317+         check ( 
318+             "_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E" , 
319+             "proc_macro::quote::quote::{{closure}}::h90045007b0e69fc9" , 
320+             "proc_macro::quote::quote::{{closure}}" , 
321+         ) ; 
322+     } 
323+ 
324+     /// Checks the result of symbol normalization. 
325+ /// The function receives the mangled symbol, and expects the correct demangled 
326+ /// symbol and normalized symbol. The demangled version is passed mostly just to make 
327+ /// the test more readable. 
328+ fn  check ( symbol :  & str ,  expect_demangled :  & str ,  expect_normalized :  & str )  { 
329+         let  demangled = demangle ( symbol) . to_string ( ) ; 
330+         assert_eq ! ( demangled,  expect_demangled) ; 
331+         assert_eq ! ( normalize_symbol_name( symbol) ,  expect_normalized. to_string( ) ) ; 
332+     } 
333+ } 
0 commit comments