@@ -2399,26 +2399,68 @@ impl Display for bool {
2399
2399
impl Debug for str {
2400
2400
fn fmt ( & self , f : & mut Formatter < ' _ > ) -> Result {
2401
2401
f. write_char ( '"' ) ?;
2402
- let mut from = 0 ;
2403
- for ( i, c) in self . char_indices ( ) {
2404
- // a fast path for ASCII chars that do not need escapes:
2405
- if matches ! ( c, ' ' ..='~' ) && !matches ! ( c, '\\' | '\"' ) {
2406
- continue ;
2407
- }
2408
2402
2409
- let esc = c. escape_debug_ext ( EscapeDebugExtArgs {
2410
- escape_grapheme_extended : true ,
2411
- escape_single_quote : false ,
2412
- escape_double_quote : true ,
2413
- } ) ;
2414
- // If char needs escaping, flush backlog so far and write, else skip
2415
- if esc. len ( ) != 1 {
2416
- f. write_str ( & self [ from..i] ) ?;
2403
+ // substring we know is printable
2404
+ let mut printable_range = 0 ..0 ;
2405
+
2406
+ // the outer loop here splits the string into ASCII-only, and Unicode-only chunks,
2407
+ // which are then processed separately, to enable a fast path for the ASCII-only chunk.
2408
+ let mut rest = self . as_bytes ( ) ;
2409
+ while rest. len ( ) > 0 {
2410
+ let mut ascii_bytes: & [ u8 ] ;
2411
+ let unicode_bytes: & [ u8 ] ;
2412
+
2413
+ // first, handle an ascii-only prefix
2414
+ let non_ascii_position = rest. iter ( ) . position ( |& b| b >= 0x80 ) . unwrap_or ( rest. len ( ) ) ;
2415
+ // SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2416
+ ( ascii_bytes, rest) = unsafe { rest. split_at_unchecked ( non_ascii_position) } ;
2417
+
2418
+ fn needs_escape ( b : u8 ) -> bool {
2419
+ b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
2420
+ }
2421
+ while let Some ( escape_position) = ascii_bytes. iter ( ) . position ( |& b| needs_escape ( b) ) {
2422
+ printable_range. end += escape_position;
2423
+ f. write_str ( & self [ printable_range. clone ( ) ] ) ?;
2424
+
2425
+ let c = ascii_bytes[ escape_position] as char ;
2426
+ let esc = c. escape_debug_ext ( EscapeDebugExtArgs {
2427
+ escape_grapheme_extended : true ,
2428
+ escape_single_quote : false ,
2429
+ escape_double_quote : true ,
2430
+ } ) ;
2417
2431
Display :: fmt ( & esc, f) ?;
2418
- from = i + c. len_utf8 ( ) ;
2432
+
2433
+ ascii_bytes = & ascii_bytes[ escape_position + 1 ..] ;
2434
+ printable_range = ( printable_range. end + 1 ) ..( printable_range. end + 1 ) ;
2435
+ }
2436
+ printable_range. end += ascii_bytes. len ( ) ;
2437
+
2438
+ // then, handle a unicode-only prefix
2439
+ let ascii_position = rest. iter ( ) . position ( |& b| b < 0x80 ) . unwrap_or ( rest. len ( ) ) ;
2440
+ // SAFETY: the position was derived from an iterator, so is known to be within bounds, and at a char boundary
2441
+ ( unicode_bytes, rest) = unsafe { rest. split_at_unchecked ( ascii_position) } ;
2442
+ // SAFETY: prefix is a valid utf8 sequence, and at a char boundary
2443
+ let unicode_prefix = unsafe { crate :: str:: from_utf8_unchecked ( unicode_bytes) } ;
2444
+
2445
+ for c in unicode_prefix. chars ( ) {
2446
+ // SAFETY: we know that our slice only contains unicode chars
2447
+ unsafe { crate :: hint:: assert_unchecked ( c as u32 >= 0x80 ) } ;
2448
+ let esc = c. escape_debug_ext ( EscapeDebugExtArgs {
2449
+ escape_grapheme_extended : true ,
2450
+ escape_single_quote : false ,
2451
+ escape_double_quote : true ,
2452
+ } ) ;
2453
+ if esc. len ( ) != 1 {
2454
+ f. write_str ( & self [ printable_range. clone ( ) ] ) ?;
2455
+ Display :: fmt ( & esc, f) ?;
2456
+ printable_range. start = printable_range. end + c. len_utf8 ( ) ;
2457
+ }
2458
+ printable_range. end += c. len_utf8 ( ) ;
2419
2459
}
2420
2460
}
2421
- f. write_str ( & self [ from..] ) ?;
2461
+
2462
+ f. write_str ( & self [ printable_range] ) ?;
2463
+
2422
2464
f. write_char ( '"' )
2423
2465
}
2424
2466
}
0 commit comments