Skip to content

Commit 36cd81d

Browse files
committed
Reduce amount of generated unsafe code.
1 parent 32034e8 commit 36cd81d

File tree

3 files changed

+158
-190
lines changed

3 files changed

+158
-190
lines changed

library/core/src/unicode/printable.py

Lines changed: 28 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ def compress_singletons(singletons):
9292

9393

9494
def compress_normal(normal):
95-
# lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
96-
# lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
95+
# lengths 0x00..0x7f are encoded as 0x00, 0x01, …, 0x7e, 0x7f
96+
# lengths 0x80..0x7fff are encoded as 0x8080, 0x8081, …, 0xfffe, 0xffff
9797
compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
9898

9999
prev_start = 0
@@ -108,19 +108,19 @@ def compress_normal(normal):
108108
entry.append(0x80 | (truelen >> 8))
109109
entry.append(truelen & 0xFF)
110110
else:
111-
entry.append(truelen & 0x7F)
111+
entry.append(truelen)
112112
if falselen > 0x7F:
113113
entry.append(0x80 | (falselen >> 8))
114114
entry.append(falselen & 0xFF)
115115
else:
116-
entry.append(falselen & 0x7F)
116+
entry.append(falselen)
117117

118118
compressed.append(entry)
119119

120120
return compressed
121121

122122

123-
def print_singletons(uppers, lowers, uppersname, lowersname):
123+
def print_singletons(name, uppers, lowers, uppersname, lowersname):
124124
print("#[rustfmt::skip]")
125125
print("const {}: &[(u8, u8)] = &[".format(uppersname))
126126
for u, c in uppers:
@@ -134,13 +134,15 @@ def print_singletons(uppers, lowers, uppersname, lowersname):
134134
)
135135
print("];")
136136

137+
print(f"const {name}: Singletons = Singletons::new(&{uppersname}, &{lowersname});")
138+
137139

138140
def print_normal(normal, normalname):
139141
print("#[rustfmt::skip]")
140-
print("const {}: &[u8] = &[".format(normalname))
142+
print("const {}: Normal = Normal::new(&[".format(normalname))
141143
for v in normal:
142144
print(" {}".format(" ".join("{:#04x},".format(i) for i in v)))
143-
print("];")
145+
print("]);")
144146

145147

146148
def main():
@@ -178,122 +180,49 @@ def main():
178180
else:
179181
normal0.append((a, b - a))
180182

181-
SINGLETONS0_UPPER, SINGLETONS0_LOWER = compress_singletons(singletons0)
182-
SINGLETONS1_UPPER, SINGLETONS1_LOWER = compress_singletons(singletons1)
183+
singletons0_upper, singletons0_lower = compress_singletons(singletons0)
184+
singletons1_upper, singletons1_lower = compress_singletons(singletons1)
183185
normal0 = compress_normal(normal0)
184186
normal1 = compress_normal(normal1)
185187

186188
print("""\
187189
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
188190
// do not edit directly!
189191
190-
/// # Safety
191-
///
192-
/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
193-
/// equal to the length of `singletons_lower`.
194-
/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
195-
/// endian, with the highest bit set and the length contained in the remaining 15 bits.
196-
unsafe fn check(
197-
x: u16,
198-
singletons_upper: &[(u8, u8)],
199-
singletons_lower: &[u8],
200-
normal: &[u8],
201-
) -> bool {
202-
let [x_upper, x_lower] = x.to_be_bytes();
203-
let mut lower_start = 0;
204-
for &(upper, lower_count) in singletons_upper {
205-
let lower_end = lower_start + lower_count as usize;
206-
if upper == x_upper {
207-
// SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
208-
// is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
209-
// less than `singletons_lower.len()`.
210-
for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
211-
if lower == x_lower {
212-
return false;
213-
}
214-
}
215-
} else if x_upper < upper {
216-
break;
217-
}
218-
lower_start = lower_end;
219-
}
220-
221-
let mut x = x as i32;
222-
let mut normal = normal.iter().cloned();
223-
let mut current = true;
224-
while let Some(v) = normal.next() {
225-
let len = if v & 0x80 != 0 {
226-
let upper = v & 0x7f;
227-
// SAFETY: The encoding of `normal` is guaranteed by the caller such that
228-
// if the length is greater than 0x7f, it consists of two bytes, so there
229-
// must be a next byte.
230-
let lower = unsafe { normal.next().unwrap_unchecked() };
231-
i32::from(u16::from_be_bytes([upper, lower]))
232-
} else {
233-
i32::from(v)
234-
};
235-
x -= len;
236-
if x < 0 {
237-
break;
238-
}
239-
current = !current;
240-
}
241-
current
242-
}
192+
mod check;
193+
use check::{Normal, Singletons};
243194
244195
pub(crate) fn is_printable(x: char) -> bool {
245196
let x = x as u32;
246197
let lower = x as u16;
247198
248199
match x {
249-
..32 => false, // ASCII fast path
250-
..127 => true, // ASCII fast path
251-
..0x10000 => {
252-
const {
253-
let mut lower_count_total = 0;
254-
let mut i = 0;
255-
while i < SINGLETONS0_UPPER.len() {
256-
lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
257-
i += 1;
258-
}
259-
assert!(lower_count_total == SINGLETONS0_LOWER.len());
260-
}
261-
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
262-
// to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
263-
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
264-
// the length contained in the remaining 15 bits.
265-
unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
266-
}
267-
..0x20000 => {
268-
const {
269-
let mut lower_count_total = 0;
270-
let mut i = 0;
271-
while i < SINGLETONS1_UPPER.len() {
272-
lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
273-
i += 1;
274-
}
275-
assert!(lower_count_total == SINGLETONS1_LOWER.len());
276-
}
277-
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
278-
// to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
279-
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
280-
// the length contained in the remaining 15 bits.
281-
unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
282-
}\
200+
..32 => false, // ASCII fast path.
201+
..127 => true, // ASCII fast path.
202+
..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
203+
..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),\
283204
""")
284205
for a, b in extra:
285-
print(" 0x{:x}..0x{:x} => false,".format(a, a + b))
206+
print(" {:#x}..{:#x} => false,".format(a, a + b))
286207
print("""\
287208
_ => true,
288209
}
289210
}\
290211
""")
291212
print()
292213
print_singletons(
293-
SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER"
214+
"SINGLETONS0",
215+
singletons0_upper,
216+
singletons0_lower,
217+
"SINGLETONS0_UPPER",
218+
"SINGLETONS0_LOWER",
294219
)
295220
print_singletons(
296-
SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER"
221+
"SINGLETONS1",
222+
singletons1_upper,
223+
singletons1_lower,
224+
"SINGLETONS1_UPPER",
225+
"SINGLETONS1_LOWER",
297226
)
298227
print_normal(normal0, "NORMAL0")
299228
print_normal(normal1, "NORMAL1")

library/core/src/unicode/printable.rs

Lines changed: 12 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,18 @@
11
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
22
// do not edit directly!
33

4-
/// # Safety
5-
///
6-
/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
7-
/// equal to the length of `singletons_lower`.
8-
/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
9-
/// endian, with the highest bit set and the length contained in the remaining 15 bits.
10-
unsafe fn check(
11-
x: u16,
12-
singletons_upper: &[(u8, u8)],
13-
singletons_lower: &[u8],
14-
normal: &[u8],
15-
) -> bool {
16-
let [x_upper, x_lower] = x.to_be_bytes();
17-
let mut lower_start = 0;
18-
for &(upper, lower_count) in singletons_upper {
19-
let lower_end = lower_start + lower_count as usize;
20-
if upper == x_upper {
21-
// SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
22-
// is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
23-
// less than `singletons_lower.len()`.
24-
for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
25-
if lower == x_lower {
26-
return false;
27-
}
28-
}
29-
} else if x_upper < upper {
30-
break;
31-
}
32-
lower_start = lower_end;
33-
}
34-
35-
let mut x = x as i32;
36-
let mut normal = normal.iter().cloned();
37-
let mut current = true;
38-
while let Some(v) = normal.next() {
39-
let len = if v & 0x80 != 0 {
40-
let upper = v & 0x7f;
41-
// SAFETY: The encoding of `normal` is guaranteed by the caller such that
42-
// if the length is greater than 0x7f, it consists of two bytes, so there
43-
// must be a next byte.
44-
let lower = unsafe { normal.next().unwrap_unchecked() };
45-
i32::from(u16::from_be_bytes([upper, lower]))
46-
} else {
47-
i32::from(v)
48-
};
49-
x -= len;
50-
if x < 0 {
51-
break;
52-
}
53-
current = !current;
54-
}
55-
current
56-
}
4+
mod check;
5+
use check::{Normal, Singletons};
576

587
pub(crate) fn is_printable(x: char) -> bool {
598
let x = x as u32;
609
let lower = x as u16;
6110

6211
match x {
63-
..32 => false, // ASCII fast path
64-
..127 => true, // ASCII fast path
65-
..0x10000 => {
66-
const {
67-
let mut lower_count_total = 0;
68-
let mut i = 0;
69-
while i < SINGLETONS0_UPPER.len() {
70-
lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
71-
i += 1;
72-
}
73-
assert!(lower_count_total == SINGLETONS0_LOWER.len());
74-
}
75-
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
76-
// to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
77-
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
78-
// the length contained in the remaining 15 bits.
79-
unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
80-
}
81-
..0x20000 => {
82-
const {
83-
let mut lower_count_total = 0;
84-
let mut i = 0;
85-
while i < SINGLETONS1_UPPER.len() {
86-
lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
87-
i += 1;
88-
}
89-
assert!(lower_count_total == SINGLETONS1_LOWER.len());
90-
}
91-
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
92-
// to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
93-
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
94-
// the length contained in the remaining 15 bits.
95-
unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
96-
}
12+
..32 => false, // ASCII fast path.
13+
..127 => true, // ASCII fast path.
14+
..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
15+
..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),
9716
0x2a6e0..0x2a700 => false,
9817
0x2b73a..0x2b740 => false,
9918
0x2b81e..0x2b820 => false,
@@ -191,6 +110,7 @@ const SINGLETONS0_LOWER: &[u8] = &[
191110
0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
192111
0xfe, 0xff,
193112
];
113+
const SINGLETONS0: Singletons = Singletons::new(&SINGLETONS0_UPPER, &SINGLETONS0_LOWER);
194114
#[rustfmt::skip]
195115
const SINGLETONS1_UPPER: &[(u8, u8)] = &[
196116
(0x00, 6),
@@ -267,8 +187,9 @@ const SINGLETONS1_LOWER: &[u8] = &[
267187
0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0,
268188
0xd0, 0xae, 0xaf, 0x6e, 0x6f, 0xdd, 0xde, 0x93,
269189
];
190+
const SINGLETONS1: Singletons = Singletons::new(&SINGLETONS1_UPPER, &SINGLETONS1_LOWER);
270191
#[rustfmt::skip]
271-
const NORMAL0: &[u8] = &[
192+
const NORMAL0: Normal = Normal::new(&[
272193
0x00, 0x20,
273194
0x5f, 0x22,
274195
0x82, 0xdf, 0x04,
@@ -402,9 +323,9 @@ const NORMAL0: &[u8] = &[
402323
0x80, 0xbe, 0x03,
403324
0x1b, 0x03,
404325
0x0f, 0x0d,
405-
];
326+
]);
406327
#[rustfmt::skip]
407-
const NORMAL1: &[u8] = &[
328+
const NORMAL1: Normal = Normal::new(&[
408329
0x5e, 0x22,
409330
0x7b, 0x05,
410331
0x03, 0x04,
@@ -624,4 +545,4 @@ const NORMAL1: &[u8] = &[
624545
0x1c, 0x06,
625546
0x09, 0x07,
626547
0x80, 0xfa, 0x84, 0x06,
627-
];
548+
]);

0 commit comments

Comments
 (0)