@@ -92,8 +92,8 @@ def compress_singletons(singletons):
92
92
93
93
94
94
def compress_normal (normal ):
95
- # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
96
- # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
95
+ # lengths 0x00..0x7f are encoded as 0x00, 0x01, …, 0x7e, 0x7f
96
+ # lengths 0x80..0x7fff are encoded as 0x8080, 0x8081, …, 0xfffe, 0xffff
97
97
compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
98
98
99
99
prev_start = 0
@@ -108,19 +108,19 @@ def compress_normal(normal):
108
108
entry .append (0x80 | (truelen >> 8 ))
109
109
entry .append (truelen & 0xFF )
110
110
else :
111
- entry .append (truelen & 0x7F )
111
+ entry .append (truelen )
112
112
if falselen > 0x7F :
113
113
entry .append (0x80 | (falselen >> 8 ))
114
114
entry .append (falselen & 0xFF )
115
115
else :
116
- entry .append (falselen & 0x7F )
116
+ entry .append (falselen )
117
117
118
118
compressed .append (entry )
119
119
120
120
return compressed
121
121
122
122
123
- def print_singletons (uppers , lowers , uppersname , lowersname ):
123
+ def print_singletons (name , uppers , lowers , uppersname , lowersname ):
124
124
print ("#[rustfmt::skip]" )
125
125
print ("const {}: &[(u8, u8)] = &[" .format (uppersname ))
126
126
for u , c in uppers :
@@ -134,13 +134,15 @@ def print_singletons(uppers, lowers, uppersname, lowersname):
134
134
)
135
135
print ("];" )
136
136
137
+ print (f"const { name } : Singletons = Singletons::new(&{ uppersname } , &{ lowersname } );" )
138
+
137
139
138
140
def print_normal (normal , normalname ):
139
141
print ("#[rustfmt::skip]" )
140
- print ("const {}: &[u8] = &[" .format (normalname ))
142
+ print ("const {}: Normal = Normal::new( &[" .format (normalname ))
141
143
for v in normal :
142
144
print (" {}" .format (" " .join ("{:#04x}," .format (i ) for i in v )))
143
- print ("];" )
145
+ print ("]) ;" )
144
146
145
147
146
148
def main ():
@@ -178,122 +180,49 @@ def main():
178
180
else :
179
181
normal0 .append ((a , b - a ))
180
182
181
- SINGLETONS0_UPPER , SINGLETONS0_LOWER = compress_singletons (singletons0 )
182
- SINGLETONS1_UPPER , SINGLETONS1_LOWER = compress_singletons (singletons1 )
183
+ singletons0_upper , singletons0_lower = compress_singletons (singletons0 )
184
+ singletons1_upper , singletons1_lower = compress_singletons (singletons1 )
183
185
normal0 = compress_normal (normal0 )
184
186
normal1 = compress_normal (normal1 )
185
187
186
188
print ("""\
187
189
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
188
190
// do not edit directly!
189
191
190
- /// # Safety
191
- ///
192
- /// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
193
- /// equal to the length of `singletons_lower`.
194
- /// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
195
- /// endian, with the highest bit set and the length contained in the remaining 15 bits.
196
- unsafe fn check(
197
- x: u16,
198
- singletons_upper: &[(u8, u8)],
199
- singletons_lower: &[u8],
200
- normal: &[u8],
201
- ) -> bool {
202
- let [x_upper, x_lower] = x.to_be_bytes();
203
- let mut lower_start = 0;
204
- for &(upper, lower_count) in singletons_upper {
205
- let lower_end = lower_start + lower_count as usize;
206
- if upper == x_upper {
207
- // SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
208
- // is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
209
- // less than `singletons_lower.len()`.
210
- for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
211
- if lower == x_lower {
212
- return false;
213
- }
214
- }
215
- } else if x_upper < upper {
216
- break;
217
- }
218
- lower_start = lower_end;
219
- }
220
-
221
- let mut x = x as i32;
222
- let mut normal = normal.iter().cloned();
223
- let mut current = true;
224
- while let Some(v) = normal.next() {
225
- let len = if v & 0x80 != 0 {
226
- let upper = v & 0x7f;
227
- // SAFETY: The encoding of `normal` is guaranteed by the caller such that
228
- // if the length is greater than 0x7f, it consists of two bytes, so there
229
- // must be a next byte.
230
- let lower = unsafe { normal.next().unwrap_unchecked() };
231
- i32::from(u16::from_be_bytes([upper, lower]))
232
- } else {
233
- i32::from(v)
234
- };
235
- x -= len;
236
- if x < 0 {
237
- break;
238
- }
239
- current = !current;
240
- }
241
- current
242
- }
192
+ mod check;
193
+ use check::{Normal, Singletons};
243
194
244
195
pub(crate) fn is_printable(x: char) -> bool {
245
196
let x = x as u32;
246
197
let lower = x as u16;
247
198
248
199
match x {
249
- ..32 => false, // ASCII fast path
250
- ..127 => true, // ASCII fast path
251
- ..0x10000 => {
252
- const {
253
- let mut lower_count_total = 0;
254
- let mut i = 0;
255
- while i < SINGLETONS0_UPPER.len() {
256
- lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
257
- i += 1;
258
- }
259
- assert!(lower_count_total == SINGLETONS0_LOWER.len());
260
- }
261
- // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
262
- // to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
263
- // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
264
- // the length contained in the remaining 15 bits.
265
- unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
266
- }
267
- ..0x20000 => {
268
- const {
269
- let mut lower_count_total = 0;
270
- let mut i = 0;
271
- while i < SINGLETONS1_UPPER.len() {
272
- lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
273
- i += 1;
274
- }
275
- assert!(lower_count_total == SINGLETONS1_LOWER.len());
276
- }
277
- // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
278
- // to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
279
- // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
280
- // the length contained in the remaining 15 bits.
281
- unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
282
- }\
200
+ ..32 => false, // ASCII fast path.
201
+ ..127 => true, // ASCII fast path.
202
+ ..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
203
+ ..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),\
283
204
""" )
284
205
for a , b in extra :
285
- print (" 0x{: x}..0x{: x} => false," .format (a , a + b ))
206
+ print (" {:# x}..{:# x} => false," .format (a , a + b ))
286
207
print ("""\
287
208
_ => true,
288
209
}
289
210
}\
290
211
""" )
291
212
print ()
292
213
print_singletons (
293
- SINGLETONS0_UPPER , SINGLETONS0_LOWER , "SINGLETONS0_UPPER" , "SINGLETONS0_LOWER"
214
+ "SINGLETONS0" ,
215
+ singletons0_upper ,
216
+ singletons0_lower ,
217
+ "SINGLETONS0_UPPER" ,
218
+ "SINGLETONS0_LOWER" ,
294
219
)
295
220
print_singletons (
296
- SINGLETONS1_UPPER , SINGLETONS1_LOWER , "SINGLETONS1_UPPER" , "SINGLETONS1_LOWER"
221
+ "SINGLETONS1" ,
222
+ singletons1_upper ,
223
+ singletons1_lower ,
224
+ "SINGLETONS1_UPPER" ,
225
+ "SINGLETONS1_LOWER" ,
297
226
)
298
227
print_normal (normal0 , "NORMAL0" )
299
228
print_normal (normal1 , "NORMAL1" )
0 commit comments