Skip to content

Optimize core::unicode::printable. #139540

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 34 additions & 60 deletions library/core/src/unicode/printable.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def compress_singletons(singletons):


def compress_normal(normal):
# lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
# lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
# lengths 0x00..0x7f are encoded as 0x00, 0x01, …, 0x7e, 0x7f
# lengths 0x80..0x7fff are encoded as 0x8080, 0x8081, …, 0xfffe, 0xffff
compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]

prev_start = 0
Expand All @@ -108,19 +108,19 @@ def compress_normal(normal):
entry.append(0x80 | (truelen >> 8))
entry.append(truelen & 0xFF)
else:
entry.append(truelen & 0x7F)
entry.append(truelen)
if falselen > 0x7F:
entry.append(0x80 | (falselen >> 8))
entry.append(falselen & 0xFF)
else:
entry.append(falselen & 0x7F)
entry.append(falselen)

compressed.append(entry)

return compressed


def print_singletons(uppers, lowers, uppersname, lowersname):
def print_singletons(name, uppers, lowers, uppersname, lowersname):
print("#[rustfmt::skip]")
print("const {}: &[(u8, u8)] = &[".format(uppersname))
for u, c in uppers:
Expand All @@ -134,13 +134,15 @@ def print_singletons(uppers, lowers, uppersname, lowersname):
)
print("];")

print(f"const {name}: Singletons = Singletons::new(&{uppersname}, &{lowersname});")


def print_normal(normal, normalname):
print("#[rustfmt::skip]")
print("const {}: &[u8] = &[".format(normalname))
print("const {}: Normal = Normal::new(&[".format(normalname))
for v in normal:
print(" {}".format(" ".join("{:#04x},".format(i) for i in v)))
print("];")
print("]);")


def main():
Expand Down Expand Up @@ -178,78 +180,50 @@ def main():
else:
normal0.append((a, b - a))

singletons0u, singletons0l = compress_singletons(singletons0)
singletons1u, singletons1l = compress_singletons(singletons1)
singletons0_upper, singletons0_lower = compress_singletons(singletons0)
singletons1_upper, singletons1_lower = compress_singletons(singletons1)
normal0 = compress_normal(normal0)
normal1 = compress_normal(normal1)

print("""\
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
// do not edit directly!

fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
let xupper = (x >> 8) as u8;
let mut lowerstart = 0;
for &(upper, lowercount) in singletonuppers {
let lowerend = lowerstart + lowercount as usize;
if xupper == upper {
for &lower in &singletonlowers[lowerstart..lowerend] {
if lower == x as u8 {
return false;
}
}
} else if xupper < upper {
break;
}
lowerstart = lowerend;
}

let mut x = x as i32;
let mut normal = normal.iter().cloned();
let mut current = true;
while let Some(v) = normal.next() {
let len = if v & 0x80 != 0 {
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
} else {
v as i32
};
x -= len;
if x < 0 {
break;
}
current = !current;
}
current
}
mod check;
use check::{Normal, Singletons};

pub(crate) fn is_printable(x: char) -> bool {
let x = x as u32;
let lower = x as u16;

if x < 32 {
// ASCII fast path
false
} else if x < 127 {
// ASCII fast path
true
} else if x < 0x10000 {
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
} else if x < 0x20000 {
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
} else {\
match x {
..32 => false, // ASCII fast path.
..127 => true, // ASCII fast path.
..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),\
""")
for a, b in extra:
print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b))
print(" return false;")
print(" }")
print(" {:#x}..{:#x} => false,".format(a, a + b))
print("""\
true
_ => true,
}
}\
""")
print()
print_singletons(singletons0u, singletons0l, "SINGLETONS0U", "SINGLETONS0L")
print_singletons(singletons1u, singletons1l, "SINGLETONS1U", "SINGLETONS1L")
print_singletons(
"SINGLETONS0",
singletons0_upper,
singletons0_lower,
"SINGLETONS0_UPPER",
"SINGLETONS0_LOWER",
)
print_singletons(
"SINGLETONS1",
singletons1_upper,
singletons1_lower,
"SINGLETONS1_UPPER",
"SINGLETONS1_LOWER",
)
print_normal(normal0, "NORMAL0")
print_normal(normal1, "NORMAL1")

Expand Down
112 changes: 28 additions & 84 deletions library/core/src/unicode/printable.rs
Original file line number Diff line number Diff line change
@@ -1,92 +1,34 @@
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
// do not edit directly!

fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
let xupper = (x >> 8) as u8;
let mut lowerstart = 0;
for &(upper, lowercount) in singletonuppers {
let lowerend = lowerstart + lowercount as usize;
if xupper == upper {
for &lower in &singletonlowers[lowerstart..lowerend] {
if lower == x as u8 {
return false;
}
}
} else if xupper < upper {
break;
}
lowerstart = lowerend;
}

let mut x = x as i32;
let mut normal = normal.iter().cloned();
let mut current = true;
while let Some(v) = normal.next() {
let len = if v & 0x80 != 0 {
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
} else {
v as i32
};
x -= len;
if x < 0 {
break;
}
current = !current;
}
current
}
mod check;
use check::{Normal, Singletons};

pub(crate) fn is_printable(x: char) -> bool {
let x = x as u32;
let lower = x as u16;

if x < 32 {
// ASCII fast path
false
} else if x < 127 {
// ASCII fast path
true
} else if x < 0x10000 {
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
} else if x < 0x20000 {
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
} else {
if 0x2a6e0 <= x && x < 0x2a700 {
return false;
}
if 0x2b73a <= x && x < 0x2b740 {
return false;
}
if 0x2b81e <= x && x < 0x2b820 {
return false;
}
if 0x2cea2 <= x && x < 0x2ceb0 {
return false;
}
if 0x2ebe1 <= x && x < 0x2ebf0 {
return false;
}
if 0x2ee5e <= x && x < 0x2f800 {
return false;
}
if 0x2fa1e <= x && x < 0x30000 {
return false;
}
if 0x3134b <= x && x < 0x31350 {
return false;
}
if 0x323b0 <= x && x < 0xe0100 {
return false;
}
if 0xe01f0 <= x && x < 0x110000 {
return false;
}
true
match x {
..32 => false, // ASCII fast path.
..127 => true, // ASCII fast path.
..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),
0x2a6e0..0x2a700 => false,
0x2b73a..0x2b740 => false,
0x2b81e..0x2b820 => false,
0x2cea2..0x2ceb0 => false,
0x2ebe1..0x2ebf0 => false,
0x2ee5e..0x2f800 => false,
0x2fa1e..0x30000 => false,
0x3134b..0x31350 => false,
0x323b0..0xe0100 => false,
0xe01f0..0x110000 => false,
_ => true,
}
}

#[rustfmt::skip]
const SINGLETONS0U: &[(u8, u8)] = &[
const SINGLETONS0_UPPER: &[(u8, u8)] = &[
(0x00, 1),
(0x03, 5),
(0x05, 6),
Expand Down Expand Up @@ -129,7 +71,7 @@ const SINGLETONS0U: &[(u8, u8)] = &[
(0xff, 9),
];
#[rustfmt::skip]
const SINGLETONS0L: &[u8] = &[
const SINGLETONS0_LOWER: &[u8] = &[
0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57,
0x58, 0x8b, 0x8c, 0x90, 0x1c, 0xdd, 0x0e, 0x0f,
0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f, 0x5c,
Expand Down Expand Up @@ -168,8 +110,9 @@ const SINGLETONS0L: &[u8] = &[
0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
0xfe, 0xff,
];
const SINGLETONS0: Singletons = Singletons::new(&SINGLETONS0_UPPER, &SINGLETONS0_LOWER);
#[rustfmt::skip]
const SINGLETONS1U: &[(u8, u8)] = &[
const SINGLETONS1_UPPER: &[(u8, u8)] = &[
(0x00, 6),
(0x01, 1),
(0x03, 1),
Expand Down Expand Up @@ -216,7 +159,7 @@ const SINGLETONS1U: &[(u8, u8)] = &[
(0xfb, 1),
];
#[rustfmt::skip]
const SINGLETONS1L: &[u8] = &[
const SINGLETONS1_LOWER: &[u8] = &[
0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e,
0x9e, 0x9f, 0x7b, 0x8b, 0x93, 0x96, 0xa2, 0xb2,
0xba, 0x86, 0xb1, 0x06, 0x07, 0x09, 0x36, 0x3d,
Expand Down Expand Up @@ -244,8 +187,9 @@ const SINGLETONS1L: &[u8] = &[
0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0,
0xd0, 0xae, 0xaf, 0x6e, 0x6f, 0xdd, 0xde, 0x93,
];
const SINGLETONS1: Singletons = Singletons::new(&SINGLETONS1_UPPER, &SINGLETONS1_LOWER);
#[rustfmt::skip]
const NORMAL0: &[u8] = &[
const NORMAL0: Normal = Normal::new(&[
0x00, 0x20,
0x5f, 0x22,
0x82, 0xdf, 0x04,
Expand Down Expand Up @@ -379,9 +323,9 @@ const NORMAL0: &[u8] = &[
0x80, 0xbe, 0x03,
0x1b, 0x03,
0x0f, 0x0d,
];
]);
#[rustfmt::skip]
const NORMAL1: &[u8] = &[
const NORMAL1: Normal = Normal::new(&[
0x5e, 0x22,
0x7b, 0x05,
0x03, 0x04,
Expand Down Expand Up @@ -601,4 +545,4 @@ const NORMAL1: &[u8] = &[
0x1c, 0x06,
0x09, 0x07,
0x80, 0xfa, 0x84, 0x06,
];
]);
Loading
Loading