1
1
//! Helper code for character escaping.
2
2
3
+ use crate :: ascii;
3
4
use crate :: num:: NonZeroUsize ;
4
5
use crate :: ops:: Range ;
5
6
6
- const HEX_DIGITS : [ u8 ; 16 ] = * b"0123456789abcdef" ;
7
+ const HEX_DIGITS : [ ascii :: Char ; 16 ] = * b"0123456789abcdef" . as_ascii ( ) . unwrap ( ) ;
7
8
8
9
/// Escapes a byte into provided buffer; returns length of escaped
9
10
/// representation.
10
- pub ( crate ) fn escape_ascii_into ( output : & mut [ u8 ; 4 ] , byte : u8 ) -> Range < u8 > {
11
+ pub ( crate ) fn escape_ascii_into ( output : & mut [ ascii:: Char ; 4 ] , byte : u8 ) -> Range < u8 > {
12
+ #[ inline]
13
+ fn backslash ( a : ascii:: Char ) -> ( [ ascii:: Char ; 4 ] , u8 ) {
14
+ ( [ ascii:: Char :: ReverseSolidus , a, ascii:: Char :: Null , ascii:: Char :: Null ] , 2 )
15
+ }
16
+
11
17
let ( data, len) = match byte {
12
- b'\t' => ( [ b'\\' , b't' , 0 , 0 ] , 2 ) ,
13
- b'\r' => ( [ b'\\' , b'r' , 0 , 0 ] , 2 ) ,
14
- b'\n' => ( [ b'\\' , b'n' , 0 , 0 ] , 2 ) ,
15
- b'\\' => ( [ b'\\' , b'\\' , 0 , 0 ] , 2 ) ,
16
- b'\'' => ( [ b'\\' , b'\'' , 0 , 0 ] , 2 ) ,
17
- b'"' => ( [ b'\\' , b'"' , 0 , 0 ] , 2 ) ,
18
- b'\x20' ..=b'\x7e' => ( [ byte, 0 , 0 , 0 ] , 1 ) ,
19
- _ => {
18
+ b'\t' => backslash ( ascii:: Char :: SmallT ) ,
19
+ b'\r' => backslash ( ascii:: Char :: SmallR ) ,
20
+ b'\n' => backslash ( ascii:: Char :: SmallN ) ,
21
+ b'\\' => backslash ( ascii:: Char :: ReverseSolidus ) ,
22
+ b'\'' => backslash ( ascii:: Char :: Apostrophe ) ,
23
+ b'\"' => backslash ( ascii:: Char :: QuotationMark ) ,
24
+ _ => if let Some ( a) = byte. as_ascii ( ) && !byte. is_ascii_control ( ) {
25
+ ( [ a, ascii:: Char :: Null , ascii:: Char :: Null , ascii:: Char :: Null ] , 1 )
26
+ } else {
20
27
let hi = HEX_DIGITS [ usize:: from ( byte >> 4 ) ] ;
21
28
let lo = HEX_DIGITS [ usize:: from ( byte & 0xf ) ] ;
22
- ( [ b'\\' , b'x' , hi, lo] , 4 )
29
+ ( [ ascii :: Char :: ReverseSolidus , ascii :: Char :: SmallX , hi, lo] , 4 )
23
30
}
24
31
} ;
25
32
* output = data;
26
- 0 ..( len as u8 )
33
+ 0 ..len
27
34
}
28
35
29
36
/// Escapes a character into provided buffer using `\u{NNNN}` representation.
30
- pub ( crate ) fn escape_unicode_into ( output : & mut [ u8 ; 10 ] , ch : char ) -> Range < u8 > {
31
- output[ 9 ] = b'}' ;
37
+ pub ( crate ) fn escape_unicode_into ( output : & mut [ ascii :: Char ; 10 ] , ch : char ) -> Range < u8 > {
38
+ output[ 9 ] = ascii :: Char :: RightCurlyBracket ;
32
39
33
40
let ch = ch as u32 ;
34
41
output[ 3 ] = HEX_DIGITS [ ( ( ch >> 20 ) & 15 ) as usize ] ;
@@ -41,7 +48,8 @@ pub(crate) fn escape_unicode_into(output: &mut [u8; 10], ch: char) -> Range<u8>
41
48
// or-ing 1 ensures that for ch==0 the code computes that one digit should
42
49
// be printed.
43
50
let start = ( ch | 1 ) . leading_zeros ( ) as usize / 4 - 2 ;
44
- output[ start..start + 3 ] . copy_from_slice ( b"\\ u{" ) ;
51
+ const UNICODE_ESCAPE_PREFIX : & [ ascii:: Char ; 3 ] = b"\\ u{" . as_ascii ( ) . unwrap ( ) ;
52
+ output[ start..] [ ..3 ] . copy_from_slice ( UNICODE_ESCAPE_PREFIX ) ;
45
53
46
54
( start as u8 ) ..10
47
55
}
@@ -52,41 +60,46 @@ pub(crate) fn escape_unicode_into(output: &mut [u8; 10], ch: char) -> Range<u8>
52
60
/// limited to u8 to reduce size of the structure.
53
61
#[ derive( Clone , Debug ) ]
54
62
pub ( crate ) struct EscapeIterInner < const N : usize > {
55
- // Invariant: data[alive] is all ASCII.
56
- pub ( crate ) data : [ u8 ; N ] ,
63
+ // The element type ensures this is always ASCII, and thus also valid UTF-8 .
64
+ pub ( crate ) data : [ ascii :: Char ; N ] ,
57
65
58
66
// Invariant: alive.start <= alive.end <= N.
59
67
pub ( crate ) alive : Range < u8 > ,
60
68
}
61
69
62
70
impl < const N : usize > EscapeIterInner < N > {
63
- pub fn new ( data : [ u8 ; N ] , alive : Range < u8 > ) -> Self {
71
+ pub fn new ( data : [ ascii :: Char ; N ] , alive : Range < u8 > ) -> Self {
64
72
const { assert ! ( N < 256 ) } ;
65
73
debug_assert ! ( alive. start <= alive. end && usize :: from( alive. end) <= N , "{alive:?}" ) ;
66
- let this = Self { data, alive } ;
67
- debug_assert ! ( this. as_bytes( ) . is_ascii( ) , "Expected ASCII, got {:?}" , this. as_bytes( ) ) ;
68
- this
74
+ Self { data, alive }
75
+ }
76
+
77
+ pub fn from_array < const M : usize > ( array : [ ascii:: Char ; M ] ) -> Self {
78
+ const { assert ! ( M <= N ) } ;
79
+
80
+ let mut data = [ ascii:: Char :: Null ; N ] ;
81
+ data[ ..M ] . copy_from_slice ( & array) ;
82
+ Self :: new ( data, 0 ..M as u8 )
69
83
}
70
84
71
- fn as_bytes ( & self ) -> & [ u8 ] {
85
+ pub fn as_ascii ( & self ) -> & [ ascii :: Char ] {
72
86
& self . data [ usize:: from ( self . alive . start ) ..usize:: from ( self . alive . end ) ]
73
87
}
74
88
75
89
pub fn as_str ( & self ) -> & str {
76
- // SAFETY: self.data[self.alive] is all ASCII characters.
77
- unsafe { crate :: str:: from_utf8_unchecked ( self . as_bytes ( ) ) }
90
+ self . as_ascii ( ) . as_str ( )
78
91
}
79
92
80
93
pub fn len ( & self ) -> usize {
81
94
usize:: from ( self . alive . end - self . alive . start )
82
95
}
83
96
84
97
pub fn next ( & mut self ) -> Option < u8 > {
85
- self . alive . next ( ) . map ( |i| self . data [ usize:: from ( i) ] )
98
+ self . alive . next ( ) . map ( |i| self . data [ usize:: from ( i) ] . as_u8 ( ) )
86
99
}
87
100
88
101
pub fn next_back ( & mut self ) -> Option < u8 > {
89
- self . alive . next_back ( ) . map ( |i| self . data [ usize:: from ( i) ] )
102
+ self . alive . next_back ( ) . map ( |i| self . data [ usize:: from ( i) ] . as_u8 ( ) )
90
103
}
91
104
92
105
pub fn advance_by ( & mut self , n : usize ) -> Result < ( ) , NonZeroUsize > {
0 commit comments