From 237a637889ba5f38f5e12fc5e0aa3694a22017d2 Mon Sep 17 00:00:00 2001
From: Markus Reiter <me@reitermark.us>
Date: Mon, 7 Apr 2025 01:10:59 +0200
Subject: [PATCH 1/3] Make `core::unicode::printable` more readable.

---
 library/core/src/unicode/printable.py | 48 +++++++--------
 library/core/src/unicode/printable.rs | 84 +++++++++------------------
 2 files changed, 49 insertions(+), 83 deletions(-)

diff --git a/library/core/src/unicode/printable.py b/library/core/src/unicode/printable.py
index 260fa9f9e6ad2..b21ad42067f37 100755
--- a/library/core/src/unicode/printable.py
+++ b/library/core/src/unicode/printable.py
@@ -178,8 +178,8 @@ def main():
             else:
                 normal0.append((a, b - a))
 
-    singletons0u, singletons0l = compress_singletons(singletons0)
-    singletons1u, singletons1l = compress_singletons(singletons1)
+    SINGLETONS0_UPPER, SINGLETONS0_LOWER = compress_singletons(singletons0)
+    SINGLETONS1_UPPER, SINGLETONS1_LOWER = compress_singletons(singletons1)
     normal0 = compress_normal(normal0)
     normal1 = compress_normal(normal1)
 
@@ -187,21 +187,21 @@ def main():
 // NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
-fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
-    let xupper = (x >> 8) as u8;
-    let mut lowerstart = 0;
-    for &(upper, lowercount) in singletonuppers {
-        let lowerend = lowerstart + lowercount as usize;
-        if xupper == upper {
-            for &lower in &singletonlowers[lowerstart..lowerend] {
+fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
+    let x_upper = (x >> 8) as u8;
+    let mut lower_start = 0;
+    for &(upper, lower_count) in singletons_upper {
+        let lower_end = lower_start + lower_count as usize;
+        if x_upper == upper {
+            for &lower in &singletons_lower[lower_start..lower_end] {
                 if lower == x as u8 {
                     return false;
                 }
             }
-        } else if xupper < upper {
+        } else if x_upper < upper {
             break;
         }
-        lowerstart = lowerend;
+        lower_start = lower_end;
     }
 
     let mut x = x as i32;
@@ -226,30 +226,22 @@ def main():
     let x = x as u32;
     let lower = x as u16;
 
-    if x < 32 {
-        // ASCII fast path
-        false
-    } else if x < 127 {
-        // ASCII fast path
-        true
-    } else if x < 0x10000 {
-        check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
-    } else if x < 0x20000 {
-        check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
-    } else {\
+    match x {
+        ..32 => false, // ASCII fast path
+        ..127 => true, // ASCII fast path
+        ..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
+        ..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),\
 """)
     for a, b in extra:
-        print("        if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b))
-        print("            return false;")
-        print("        }")
+        print("        0x{:x}..0x{:x} => false,".format(a, a + b))
     print("""\
-        true
+        _ => true,
     }
 }\
 """)
     print()
-    print_singletons(singletons0u, singletons0l, "SINGLETONS0U", "SINGLETONS0L")
-    print_singletons(singletons1u, singletons1l, "SINGLETONS1U", "SINGLETONS1L")
+    print_singletons(SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER")
+    print_singletons(SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER")
     print_normal(normal0, "NORMAL0")
     print_normal(normal1, "NORMAL1")
 
diff --git a/library/core/src/unicode/printable.rs b/library/core/src/unicode/printable.rs
index d8fb50e4ed296..be0295a13c2fa 100644
--- a/library/core/src/unicode/printable.rs
+++ b/library/core/src/unicode/printable.rs
@@ -1,21 +1,21 @@
 // NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
-fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
-    let xupper = (x >> 8) as u8;
-    let mut lowerstart = 0;
-    for &(upper, lowercount) in singletonuppers {
-        let lowerend = lowerstart + lowercount as usize;
-        if xupper == upper {
-            for &lower in &singletonlowers[lowerstart..lowerend] {
+fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
+    let x_upper = (x >> 8) as u8;
+    let mut lower_start = 0;
+    for &(upper, lower_count) in singletons_upper {
+        let lower_end = lower_start + lower_count as usize;
+        if x_upper == upper {
+            for &lower in &singletons_lower[lower_start..lower_end] {
                 if lower == x as u8 {
                     return false;
                 }
             }
-        } else if xupper < upper {
+        } else if x_upper < upper {
             break;
         }
-        lowerstart = lowerend;
+        lower_start = lower_end;
     }
 
     let mut x = x as i32;
@@ -40,53 +40,27 @@ pub(crate) fn is_printable(x: char) -> bool {
     let x = x as u32;
     let lower = x as u16;
 
-    if x < 32 {
-        // ASCII fast path
-        false
-    } else if x < 127 {
-        // ASCII fast path
-        true
-    } else if x < 0x10000 {
-        check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
-    } else if x < 0x20000 {
-        check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
-    } else {
-        if 0x2a6e0 <= x && x < 0x2a700 {
-            return false;
-        }
-        if 0x2b73a <= x && x < 0x2b740 {
-            return false;
-        }
-        if 0x2b81e <= x && x < 0x2b820 {
-            return false;
-        }
-        if 0x2cea2 <= x && x < 0x2ceb0 {
-            return false;
-        }
-        if 0x2ebe1 <= x && x < 0x2ebf0 {
-            return false;
-        }
-        if 0x2ee5e <= x && x < 0x2f800 {
-            return false;
-        }
-        if 0x2fa1e <= x && x < 0x30000 {
-            return false;
-        }
-        if 0x3134b <= x && x < 0x31350 {
-            return false;
-        }
-        if 0x323b0 <= x && x < 0xe0100 {
-            return false;
-        }
-        if 0xe01f0 <= x && x < 0x110000 {
-            return false;
-        }
-        true
+    match x {
+        ..32 => false, // ASCII fast path
+        ..127 => true, // ASCII fast path
+        ..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
+        ..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),
+        0x2a6e0..0x2a700 => false,
+        0x2b73a..0x2b740 => false,
+        0x2b81e..0x2b820 => false,
+        0x2cea2..0x2ceb0 => false,
+        0x2ebe1..0x2ebf0 => false,
+        0x2ee5e..0x2f800 => false,
+        0x2fa1e..0x30000 => false,
+        0x3134b..0x31350 => false,
+        0x323b0..0xe0100 => false,
+        0xe01f0..0x110000 => false,
+        _ => true,
     }
 }
 
 #[rustfmt::skip]
-const SINGLETONS0U: &[(u8, u8)] = &[
+const SINGLETONS0_UPPER: &[(u8, u8)] = &[
     (0x00, 1),
     (0x03, 5),
     (0x05, 6),
@@ -129,7 +103,7 @@ const SINGLETONS0U: &[(u8, u8)] = &[
     (0xff, 9),
 ];
 #[rustfmt::skip]
-const SINGLETONS0L: &[u8] = &[
+const SINGLETONS0_LOWER: &[u8] = &[
     0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57,
     0x58, 0x8b, 0x8c, 0x90, 0x1c, 0xdd, 0x0e, 0x0f,
     0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f, 0x5c,
@@ -169,7 +143,7 @@ const SINGLETONS0L: &[u8] = &[
     0xfe, 0xff,
 ];
 #[rustfmt::skip]
-const SINGLETONS1U: &[(u8, u8)] = &[
+const SINGLETONS1_UPPER: &[(u8, u8)] = &[
     (0x00, 6),
     (0x01, 1),
     (0x03, 1),
@@ -216,7 +190,7 @@ const SINGLETONS1U: &[(u8, u8)] = &[
     (0xfb, 1),
 ];
 #[rustfmt::skip]
-const SINGLETONS1L: &[u8] = &[
+const SINGLETONS1_LOWER: &[u8] = &[
     0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e,
     0x9e, 0x9f, 0x7b, 0x8b, 0x93, 0x96, 0xa2, 0xb2,
     0xba, 0x86, 0xb1, 0x06, 0x07, 0x09, 0x36, 0x3d,

From 32034e8e7e391c620e241c9a6b5be79d8ca7d14e Mon Sep 17 00:00:00 2001
From: Markus Reiter <me@reitermark.us>
Date: Mon, 7 Apr 2025 01:16:30 +0200
Subject: [PATCH 2/3] Optimize `core::unicode::printable`.

---
 library/core/src/unicode/printable.py | 75 +++++++++++++++++++++++----
 library/core/src/unicode/printable.rs | 67 ++++++++++++++++++++----
 2 files changed, 122 insertions(+), 20 deletions(-)

diff --git a/library/core/src/unicode/printable.py b/library/core/src/unicode/printable.py
index b21ad42067f37..8ea82ded9b36f 100755
--- a/library/core/src/unicode/printable.py
+++ b/library/core/src/unicode/printable.py
@@ -187,14 +187,28 @@ def main():
 // NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
-fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
-    let x_upper = (x >> 8) as u8;
+/// # Safety
+///
+/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
+///   equal to the length of `singletons_lower`.
+/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
+///   endian, with the highest bit set and the length contained in the remaining 15 bits.
+unsafe fn check(
+    x: u16,
+    singletons_upper: &[(u8, u8)],
+    singletons_lower: &[u8],
+    normal: &[u8],
+) -> bool {
+    let [x_upper, x_lower] = x.to_be_bytes();
     let mut lower_start = 0;
     for &(upper, lower_count) in singletons_upper {
         let lower_end = lower_start + lower_count as usize;
-        if x_upper == upper {
-            for &lower in &singletons_lower[lower_start..lower_end] {
-                if lower == x as u8 {
+        if upper == x_upper {
+            // SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
+            // is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
+            // less than `singletons_lower.len()`.
+            for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
+                if lower == x_lower {
                     return false;
                 }
             }
@@ -209,9 +223,14 @@ def main():
     let mut current = true;
     while let Some(v) = normal.next() {
         let len = if v & 0x80 != 0 {
-            ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
+            let upper = v & 0x7f;
+            // SAFETY: The encoding of `normal` is guaranteed by the caller such that
+            // if the length is greater than 0x7f, it consists of two bytes, so there
+            // must be a next byte.
+            let lower = unsafe { normal.next().unwrap_unchecked() };
+            i32::from(u16::from_be_bytes([upper, lower]))
         } else {
-            v as i32
+            i32::from(v)
         };
         x -= len;
         if x < 0 {
@@ -229,8 +248,38 @@ def main():
     match x {
         ..32 => false, // ASCII fast path
         ..127 => true, // ASCII fast path
-        ..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
-        ..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),\
+        ..0x10000 => {
+            const {
+                let mut lower_count_total = 0;
+                let mut i = 0;
+                while i < SINGLETONS0_UPPER.len() {
+                    lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
+                    i += 1;
+                }
+                assert!(lower_count_total == SINGLETONS0_LOWER.len());
+            }
+            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
+            // to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
+            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
+            // the length contained in the remaining 15 bits.
+            unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
+        }
+        ..0x20000 => {
+            const {
+                let mut lower_count_total = 0;
+                let mut i = 0;
+                while i < SINGLETONS1_UPPER.len() {
+                    lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
+                    i += 1;
+                }
+                assert!(lower_count_total == SINGLETONS1_LOWER.len());
+            }
+            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
+            // to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
+            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
+            // the length contained in the remaining 15 bits.
+            unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
+        }\
 """)
     for a, b in extra:
         print("        0x{:x}..0x{:x} => false,".format(a, a + b))
@@ -240,8 +289,12 @@ def main():
 }\
 """)
     print()
-    print_singletons(SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER")
-    print_singletons(SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER")
+    print_singletons(
+        SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER"
+    )
+    print_singletons(
+        SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER"
+    )
     print_normal(normal0, "NORMAL0")
     print_normal(normal1, "NORMAL1")
 
diff --git a/library/core/src/unicode/printable.rs b/library/core/src/unicode/printable.rs
index be0295a13c2fa..8cd891670cf0a 100644
--- a/library/core/src/unicode/printable.rs
+++ b/library/core/src/unicode/printable.rs
@@ -1,14 +1,28 @@
 // NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
-fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal: &[u8]) -> bool {
-    let x_upper = (x >> 8) as u8;
+/// # Safety
+///
+/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
+///   equal to the length of `singletons_lower`.
+/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
+///   endian, with the highest bit set and the length contained in the remaining 15 bits.
+unsafe fn check(
+    x: u16,
+    singletons_upper: &[(u8, u8)],
+    singletons_lower: &[u8],
+    normal: &[u8],
+) -> bool {
+    let [x_upper, x_lower] = x.to_be_bytes();
     let mut lower_start = 0;
     for &(upper, lower_count) in singletons_upper {
         let lower_end = lower_start + lower_count as usize;
-        if x_upper == upper {
-            for &lower in &singletons_lower[lower_start..lower_end] {
-                if lower == x as u8 {
+        if upper == x_upper {
+            // SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
+            // is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
+            // less than `singletons_lower.len()`.
+            for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
+                if lower == x_lower {
                     return false;
                 }
             }
@@ -23,9 +37,14 @@ fn check(x: u16, singletons_upper: &[(u8, u8)], singletons_lower: &[u8], normal:
     let mut current = true;
     while let Some(v) = normal.next() {
         let len = if v & 0x80 != 0 {
-            ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
+            let upper = v & 0x7f;
+            // SAFETY: The encoding of `normal` is guaranteed by the caller such that
+            // if the length is greater than 0x7f, it consists of two bytes, so there
+            // must be a next byte.
+            let lower = unsafe { normal.next().unwrap_unchecked() };
+            i32::from(u16::from_be_bytes([upper, lower]))
         } else {
-            v as i32
+            i32::from(v)
         };
         x -= len;
         if x < 0 {
@@ -43,8 +62,38 @@ pub(crate) fn is_printable(x: char) -> bool {
     match x {
         ..32 => false, // ASCII fast path
         ..127 => true, // ASCII fast path
-        ..0x10000 => check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0),
-        ..0x20000 => check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1),
+        ..0x10000 => {
+            const {
+                let mut lower_count_total = 0;
+                let mut i = 0;
+                while i < SINGLETONS0_UPPER.len() {
+                    lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
+                    i += 1;
+                }
+                assert!(lower_count_total == SINGLETONS0_LOWER.len());
+            }
+            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
+            // to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
+            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
+            // the length contained in the remaining 15 bits.
+            unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
+        }
+        ..0x20000 => {
+            const {
+                let mut lower_count_total = 0;
+                let mut i = 0;
+                while i < SINGLETONS1_UPPER.len() {
+                    lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
+                    i += 1;
+                }
+                assert!(lower_count_total == SINGLETONS1_LOWER.len());
+            }
+            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
+            // to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
+            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
+            // the length contained in the remaining 15 bits.
+            unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
+        }
         0x2a6e0..0x2a700 => false,
         0x2b73a..0x2b740 => false,
         0x2b81e..0x2b820 => false,

From 66ada05d5410260f8b0c468d4887f072428117e4 Mon Sep 17 00:00:00 2001
From: Markus Reiter <me@reitermark.us>
Date: Fri, 30 May 2025 02:22:48 +0200
Subject: [PATCH 3/3] Reduce amount of generated `unsafe` code.

---
 library/core/src/unicode/printable.py       | 127 ++++-------------
 library/core/src/unicode/printable.rs       | 103 ++------------
 library/core/src/unicode/printable/check.rs | 147 ++++++++++++++++++++
 3 files changed, 187 insertions(+), 190 deletions(-)
 create mode 100644 library/core/src/unicode/printable/check.rs

diff --git a/library/core/src/unicode/printable.py b/library/core/src/unicode/printable.py
index 8ea82ded9b36f..d9d811cd95fee 100755
--- a/library/core/src/unicode/printable.py
+++ b/library/core/src/unicode/printable.py
@@ -92,8 +92,8 @@ def compress_singletons(singletons):
 
 
 def compress_normal(normal):
-    # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
-    # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
+    # lengths 0x00..0x7f are encoded as 0x00, 0x01, …, 0x7e, 0x7f
+    # lengths 0x80..0x7fff are encoded as 0x8080, 0x8081, …, 0xfffe, 0xffff
     compressed = []  # [truelen, (truelenaux), falselen, (falselenaux)]
 
     prev_start = 0
@@ -108,19 +108,19 @@ def compress_normal(normal):
             entry.append(0x80 | (truelen >> 8))
             entry.append(truelen & 0xFF)
         else:
-            entry.append(truelen & 0x7F)
+            entry.append(truelen)
         if falselen > 0x7F:
             entry.append(0x80 | (falselen >> 8))
             entry.append(falselen & 0xFF)
         else:
-            entry.append(falselen & 0x7F)
+            entry.append(falselen)
 
         compressed.append(entry)
 
     return compressed
 
 
-def print_singletons(uppers, lowers, uppersname, lowersname):
+def print_singletons(name, uppers, lowers, uppersname, lowersname):
     print("#[rustfmt::skip]")
     print("const {}: &[(u8, u8)] = &[".format(uppersname))
     for u, c in uppers:
@@ -134,13 +134,15 @@ def print_singletons(uppers, lowers, uppersname, lowersname):
         )
     print("];")
 
+    print(f"const {name}: Singletons = Singletons::new(&{uppersname}, &{lowersname});")
+
 
 def print_normal(normal, normalname):
     print("#[rustfmt::skip]")
-    print("const {}: &[u8] = &[".format(normalname))
+    print("const {}: Normal = Normal::new(&[".format(normalname))
     for v in normal:
         print("    {}".format(" ".join("{:#04x},".format(i) for i in v)))
-    print("];")
+    print("]);")
 
 
 def main():
@@ -178,8 +180,8 @@ def main():
             else:
                 normal0.append((a, b - a))
 
-    SINGLETONS0_UPPER, SINGLETONS0_LOWER = compress_singletons(singletons0)
-    SINGLETONS1_UPPER, SINGLETONS1_LOWER = compress_singletons(singletons1)
+    singletons0_upper, singletons0_lower = compress_singletons(singletons0)
+    singletons1_upper, singletons1_lower = compress_singletons(singletons1)
     normal0 = compress_normal(normal0)
     normal1 = compress_normal(normal1)
 
@@ -187,102 +189,21 @@ def main():
 // NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
-/// # Safety
-///
-/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
-///   equal to the length of `singletons_lower`.
-/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
-///   endian, with the highest bit set and the length contained in the remaining 15 bits.
-unsafe fn check(
-    x: u16,
-    singletons_upper: &[(u8, u8)],
-    singletons_lower: &[u8],
-    normal: &[u8],
-) -> bool {
-    let [x_upper, x_lower] = x.to_be_bytes();
-    let mut lower_start = 0;
-    for &(upper, lower_count) in singletons_upper {
-        let lower_end = lower_start + lower_count as usize;
-        if upper == x_upper {
-            // SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
-            // is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
-            // less than `singletons_lower.len()`.
-            for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
-                if lower == x_lower {
-                    return false;
-                }
-            }
-        } else if x_upper < upper {
-            break;
-        }
-        lower_start = lower_end;
-    }
-
-    let mut x = x as i32;
-    let mut normal = normal.iter().cloned();
-    let mut current = true;
-    while let Some(v) = normal.next() {
-        let len = if v & 0x80 != 0 {
-            let upper = v & 0x7f;
-            // SAFETY: The encoding of `normal` is guaranteed by the caller such that
-            // if the length is greater than 0x7f, it consists of two bytes, so there
-            // must be a next byte.
-            let lower = unsafe { normal.next().unwrap_unchecked() };
-            i32::from(u16::from_be_bytes([upper, lower]))
-        } else {
-            i32::from(v)
-        };
-        x -= len;
-        if x < 0 {
-            break;
-        }
-        current = !current;
-    }
-    current
-}
+mod check;
+use check::{Normal, Singletons};
 
 pub(crate) fn is_printable(x: char) -> bool {
     let x = x as u32;
     let lower = x as u16;
 
     match x {
-        ..32 => false, // ASCII fast path
-        ..127 => true, // ASCII fast path
-        ..0x10000 => {
-            const {
-                let mut lower_count_total = 0;
-                let mut i = 0;
-                while i < SINGLETONS0_UPPER.len() {
-                    lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
-                    i += 1;
-                }
-                assert!(lower_count_total == SINGLETONS0_LOWER.len());
-            }
-            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
-            // to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
-            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
-            // the length contained in the remaining 15 bits.
-            unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
-        }
-        ..0x20000 => {
-            const {
-                let mut lower_count_total = 0;
-                let mut i = 0;
-                while i < SINGLETONS1_UPPER.len() {
-                    lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
-                    i += 1;
-                }
-                assert!(lower_count_total == SINGLETONS1_LOWER.len());
-            }
-            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
-            // to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
-            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
-            // the length contained in the remaining 15 bits.
-            unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
-        }\
+        ..32 => false, // ASCII fast path.
+        ..127 => true, // ASCII fast path.
+        ..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
+        ..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),\
 """)
     for a, b in extra:
-        print("        0x{:x}..0x{:x} => false,".format(a, a + b))
+        print("        {:#x}..{:#x} => false,".format(a, a + b))
     print("""\
         _ => true,
     }
@@ -290,10 +211,18 @@ def main():
 """)
     print()
     print_singletons(
-        SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER"
+        "SINGLETONS0",
+        singletons0_upper,
+        singletons0_lower,
+        "SINGLETONS0_UPPER",
+        "SINGLETONS0_LOWER",
     )
     print_singletons(
-        SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER"
+        "SINGLETONS1",
+        singletons1_upper,
+        singletons1_lower,
+        "SINGLETONS1_UPPER",
+        "SINGLETONS1_LOWER",
     )
     print_normal(normal0, "NORMAL0")
     print_normal(normal1, "NORMAL1")
diff --git a/library/core/src/unicode/printable.rs b/library/core/src/unicode/printable.rs
index 8cd891670cf0a..878ecbc436384 100644
--- a/library/core/src/unicode/printable.rs
+++ b/library/core/src/unicode/printable.rs
@@ -1,99 +1,18 @@
 // NOTE: The following code was generated by "library/core/src/unicode/printable.py",
 //       do not edit directly!
 
-/// # Safety
-///
-/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
-///   equal to the length of `singletons_lower`.
-/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
-///   endian, with the highest bit set and the length contained in the remaining 15 bits.
-unsafe fn check(
-    x: u16,
-    singletons_upper: &[(u8, u8)],
-    singletons_lower: &[u8],
-    normal: &[u8],
-) -> bool {
-    let [x_upper, x_lower] = x.to_be_bytes();
-    let mut lower_start = 0;
-    for &(upper, lower_count) in singletons_upper {
-        let lower_end = lower_start + lower_count as usize;
-        if upper == x_upper {
-            // SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
-            // is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
-            // less than `singletons_lower.len()`.
-            for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
-                if lower == x_lower {
-                    return false;
-                }
-            }
-        } else if x_upper < upper {
-            break;
-        }
-        lower_start = lower_end;
-    }
-
-    let mut x = x as i32;
-    let mut normal = normal.iter().cloned();
-    let mut current = true;
-    while let Some(v) = normal.next() {
-        let len = if v & 0x80 != 0 {
-            let upper = v & 0x7f;
-            // SAFETY: The encoding of `normal` is guaranteed by the caller such that
-            // if the length is greater than 0x7f, it consists of two bytes, so there
-            // must be a next byte.
-            let lower = unsafe { normal.next().unwrap_unchecked() };
-            i32::from(u16::from_be_bytes([upper, lower]))
-        } else {
-            i32::from(v)
-        };
-        x -= len;
-        if x < 0 {
-            break;
-        }
-        current = !current;
-    }
-    current
-}
+mod check;
+use check::{Normal, Singletons};
 
 pub(crate) fn is_printable(x: char) -> bool {
     let x = x as u32;
     let lower = x as u16;
 
     match x {
-        ..32 => false, // ASCII fast path
-        ..127 => true, // ASCII fast path
-        ..0x10000 => {
-            const {
-                let mut lower_count_total = 0;
-                let mut i = 0;
-                while i < SINGLETONS0_UPPER.len() {
-                    lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
-                    i += 1;
-                }
-                assert!(lower_count_total == SINGLETONS0_LOWER.len());
-            }
-            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
-            // to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
-            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
-            // the length contained in the remaining 15 bits.
-            unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
-        }
-        ..0x20000 => {
-            const {
-                let mut lower_count_total = 0;
-                let mut i = 0;
-                while i < SINGLETONS1_UPPER.len() {
-                    lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
-                    i += 1;
-                }
-                assert!(lower_count_total == SINGLETONS1_LOWER.len());
-            }
-            // SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
-            // to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
-            // greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
-            // the length contained in the remaining 15 bits.
-            unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
-        }
+        ..32 => false, // ASCII fast path.
+        ..127 => true, // ASCII fast path.
+        ..0x10000 => SINGLETONS0.check(lower) && NORMAL0.check(lower),
+        ..0x20000 => SINGLETONS1.check(lower) && NORMAL1.check(lower),
         0x2a6e0..0x2a700 => false,
         0x2b73a..0x2b740 => false,
         0x2b81e..0x2b820 => false,
@@ -191,6 +110,7 @@ const SINGLETONS0_LOWER: &[u8] = &[
     0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7,
     0xfe, 0xff,
 ];
+const SINGLETONS0: Singletons = Singletons::new(&SINGLETONS0_UPPER, &SINGLETONS0_LOWER);
 #[rustfmt::skip]
 const SINGLETONS1_UPPER: &[(u8, u8)] = &[
     (0x00, 6),
@@ -267,8 +187,9 @@ const SINGLETONS1_LOWER: &[u8] = &[
     0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0,
     0xd0, 0xae, 0xaf, 0x6e, 0x6f, 0xdd, 0xde, 0x93,
 ];
+const SINGLETONS1: Singletons = Singletons::new(&SINGLETONS1_UPPER, &SINGLETONS1_LOWER);
 #[rustfmt::skip]
-const NORMAL0: &[u8] = &[
+const NORMAL0: Normal = Normal::new(&[
     0x00, 0x20,
     0x5f, 0x22,
     0x82, 0xdf, 0x04,
@@ -402,9 +323,9 @@ const NORMAL0: &[u8] = &[
     0x80, 0xbe, 0x03,
     0x1b, 0x03,
     0x0f, 0x0d,
-];
+]);
 #[rustfmt::skip]
-const NORMAL1: &[u8] = &[
+const NORMAL1: Normal = Normal::new(&[
     0x5e, 0x22,
     0x7b, 0x05,
     0x03, 0x04,
@@ -624,4 +545,4 @@ const NORMAL1: &[u8] = &[
     0x1c, 0x06,
     0x09, 0x07,
     0x80, 0xfa, 0x84, 0x06,
-];
+]);
diff --git a/library/core/src/unicode/printable/check.rs b/library/core/src/unicode/printable/check.rs
new file mode 100644
index 0000000000000..d62a57950448e
--- /dev/null
+++ b/library/core/src/unicode/printable/check.rs
@@ -0,0 +1,147 @@
+use crate::{iter, slice};
+
+/// A compact representation of Unicode singletons.
+///
+/// This is basically a `&[u16]`, but represented as `&[(u8, &[u8])]`,
+/// i.e. pairs of upper bytes and multiple corresponding lower bytes.
+///
+/// However, in order to reduce the pointer-sized overhead for each nested
+/// slice, it is compacted again into `&[(u8, u8)]` with the length of the
+/// lower bytes in the second byte, and a separate, contiguous `&[u8]` for
+/// storing the lower bytes.
+pub(super) struct Singletons {
+    upper: &'static [(u8, u8)],
+    lower: &'static [u8],
+}
+
+impl Singletons {
+    /// Creates a new `Singletons` instance from compacted upper and lower bytes.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the sum of all lengths (i.e. the second field of each pair) in `upper`
+    /// is not equal to the length of `lower`.
+    pub(super) const fn new(upper: &'static [(u8, u8)], lower: &'static [u8]) -> Self {
+        let mut lower_count_total = 0;
+        let mut i = 0;
+        while i < upper.len() {
+            lower_count_total += upper[i].1 as usize;
+            i += 1;
+        }
+        assert!(
+            lower_count_total == lower.len(),
+            "Sum of lengths in `upper` does not match `lower` length."
+        );
+
+        Self { upper, lower }
+    }
+
+    #[inline]
+    fn iter(&self) -> SingletonsIter {
+        SingletonsIter { iter: self.upper.iter().cloned(), lower: self.lower, lower_start: 0 }
+    }
+
+    pub(super) fn check(&self, x: u16) -> bool {
+        let [x_upper, x_lower] = x.to_be_bytes();
+        for (upper, lowers) in self.iter() {
+            if upper == x_upper {
+                for &lower in lowers {
+                    if lower == x_lower {
+                        return false;
+                    }
+                }
+            } else if x_upper < upper {
+                break;
+            }
+        }
+
+        true
+    }
+}
+
+struct SingletonsIter {
+    iter: iter::Cloned<slice::Iter<'static, (u8, u8)>>,
+    lower: &'static [u8],
+    lower_start: usize,
+}
+
+impl Iterator for SingletonsIter {
+    type Item = (u8, &'static [u8]);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let (upper, lower_count) = self.iter.next()?;
+
+        let lower_start = self.lower_start;
+        let lower_end = lower_start + lower_count as usize;
+        self.lower_start = lower_end;
+
+        // SAFETY: The invariant for `Singletons` guarantees that the sum of all lengths
+        // in `upper` must be equal to the lengths of `lower`, so `lower_end` is guaranteed
+        // to be in range.
+        let lowers = unsafe { self.lower.get_unchecked(lower_start..lower_end) };
+
+        Some((upper, lowers))
+    }
+}
+
+/// A compact representation of lengths.
+pub(super) struct Normal(&'static [u8]);
+
+impl Normal {
+    pub(super) const fn new(normal: &'static [u8]) -> Self {
+        // Invariant: Lengths greater than `0x7f` must be encoded as two bytes,
+        // with the length contained in the remaining 15 bits, i.e. `0x7fff`.
+        {
+            let mut i = 0;
+            while i < normal.len() {
+                if normal[i] & 0b1000_0000 != 0 {
+                    assert!(
+                        i + 1 < normal.len(),
+                        "Length greater than `0x7f` is not encoded as two bytes."
+                    );
+                    i += 2;
+                } else {
+                    i += 1;
+                }
+            }
+        }
+
+        Self(normal)
+    }
+
+    #[inline]
+    fn iter(&self) -> NormalIter {
+        NormalIter { iter: self.0.iter().cloned() }
+    }
+
+    pub(super) fn check(&self, mut x: u16) -> bool {
+        let mut current = true;
+        for len in self.iter() {
+            x = if let Some(x) = x.checked_sub(len) { x } else { break };
+            current = !current;
+        }
+        current
+    }
+}
+
+struct NormalIter {
+    iter: iter::Cloned<slice::Iter<'static, u8>>,
+}
+
+impl Iterator for NormalIter {
+    type Item = u16;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let len = self.iter.next()?;
+
+        Some(if len & 0b1000_0000 != 0 {
+            let upper = len & 0b0111_1111;
+            // SAFETY: The invariant of `Normal` guarantees that lengths are encoded
+            // as two bytes if greater than `0x7f`, so there must be a next byte.
+            let lower = unsafe { self.iter.next().unwrap_unchecked() };
+            u16::from_be_bytes([upper, lower])
+        } else {
+            u16::from(len)
+        })
+    }
+}