Skip to content

Commit ee7d1a7

Browse files
committed
Auto merge of rust-lang#128200 - estebank:normalize-whitespace, r=<try>
[perf] Change output normalization logic to be linear against size of output I believe the previous code was accidentally quadratic. Let's perf it.
2 parents eb10639 + cd8ef40 commit ee7d1a7

File tree

1 file changed

+53
-55
lines changed

1 file changed

+53
-55
lines changed

compiler/rustc_errors/src/emitter.rs

+53-55
Original file line numberDiff line numberDiff line change
@@ -2557,62 +2557,60 @@ fn num_decimal_digits(num: usize) -> usize {
25572557
MAX_DIGITS
25582558
}
25592559

2560-
// We replace some characters so the CLI output is always consistent and underlines aligned.
2561-
// Keep the following list in sync with `rustc_span::char_width`.
2562-
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2563-
('\t', " "), // We do our own tab replacement
2564-
('\u{200D}', ""), // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
2565-
('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently
2566-
('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
2567-
('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always.
2568-
('\u{202E}', "�"),
2569-
('\u{2066}', "�"),
2570-
('\u{2067}', "�"),
2571-
('\u{2068}', "�"),
2572-
('\u{202C}', "�"),
2573-
('\u{2069}', "�"),
2574-
// In terminals without Unicode support the following will be garbled, but in *all* terminals
2575-
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2576-
// support" gate.
2577-
('\u{0000}', "␀"),
2578-
('\u{0001}', "␁"),
2579-
('\u{0002}', "␂"),
2580-
('\u{0003}', "␃"),
2581-
('\u{0004}', "␄"),
2582-
('\u{0005}', "␅"),
2583-
('\u{0006}', "␆"),
2584-
('\u{0007}', "␇"),
2585-
('\u{0008}', "␈"),
2586-
('\u{000B}', "␋"),
2587-
('\u{000C}', "␌"),
2588-
('\u{000D}', "␍"),
2589-
('\u{000E}', "␎"),
2590-
('\u{000F}', "␏"),
2591-
('\u{0010}', "␐"),
2592-
('\u{0011}', "␑"),
2593-
('\u{0012}', "␒"),
2594-
('\u{0013}', "␓"),
2595-
('\u{0014}', "␔"),
2596-
('\u{0015}', "␕"),
2597-
('\u{0016}', "␖"),
2598-
('\u{0017}', "␗"),
2599-
('\u{0018}', "␘"),
2600-
('\u{0019}', "␙"),
2601-
('\u{001A}', "␚"),
2602-
('\u{001B}', "␛"),
2603-
('\u{001C}', "␜"),
2604-
('\u{001D}', "␝"),
2605-
('\u{001E}', "␞"),
2606-
('\u{001F}', "␟"),
2607-
('\u{007F}', "␡"),
2608-
];
2609-
26102560
fn normalize_whitespace(str: &str) -> String {
2611-
let mut s = str.to_string();
2612-
for (c, replacement) in OUTPUT_REPLACEMENTS {
2613-
s = s.replace(*c, replacement);
2614-
}
2615-
s
2561+
// We replace some characters so the CLI output is always consistent and underlines aligned.
2562+
// Keep the following list in sync with `rustc_span::char_width`.
2563+
let output_replacements = FxHashMap::from_iter([
2564+
('\t', " "), // We do our own tab replacement
2565+
('\u{200D}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
2566+
('\u{202A}', "�"), // The following unicode text flow control characters are inconsistently
2567+
('\u{202B}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
2568+
('\u{202D}', "�"), // not corresponding to the visible source code, so we replace them always.
2569+
('\u{202E}', "�"),
2570+
('\u{2066}', "�"),
2571+
('\u{2067}', "�"),
2572+
('\u{2068}', "�"),
2573+
('\u{202C}', "�"),
2574+
('\u{2069}', "�"),
2575+
// In terminals without Unicode support the following will be garbled, but in *all* terminals
2576+
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2577+
// support" gate.
2578+
('\u{0000}', "␀"),
2579+
('\u{0001}', "␁"),
2580+
('\u{0002}', "␂"),
2581+
('\u{0003}', "␃"),
2582+
('\u{0004}', "␄"),
2583+
('\u{0005}', "␅"),
2584+
('\u{0006}', "␆"),
2585+
('\u{0007}', "␇"),
2586+
('\u{0008}', "␈"),
2587+
('\u{000B}', "␋"),
2588+
('\u{000C}', "␌"),
2589+
('\u{000D}', "␍"),
2590+
('\u{000E}', "␎"),
2591+
('\u{000F}', "␏"),
2592+
('\u{0010}', "␐"),
2593+
('\u{0011}', "␑"),
2594+
('\u{0012}', "␒"),
2595+
('\u{0013}', "␓"),
2596+
('\u{0014}', "␔"),
2597+
('\u{0015}', "␕"),
2598+
('\u{0016}', "␖"),
2599+
('\u{0017}', "␗"),
2600+
('\u{0018}', "␘"),
2601+
('\u{0019}', "␙"),
2602+
('\u{001A}', "␚"),
2603+
('\u{001B}', "␛"),
2604+
('\u{001C}', "␜"),
2605+
('\u{001D}', "␝"),
2606+
('\u{001E}', "␞"),
2607+
('\u{001F}', "␟"),
2608+
('\u{007F}', "␡"),
2609+
]);
2610+
2611+
str.chars()
2612+
.map(|c| output_replacements.get(&c).map_or(c.to_string(), |s| s.to_string()))
2613+
.collect()
26162614
}
26172615

26182616
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {

0 commit comments

Comments
 (0)