Skip to content

Commit 370d31b

Browse files
committed
Constify [u8]::is_ascii (unstably)
UTF-8 checking in `const fn`-stabilized back in 1.63, but apparently somehow ASCII checking was never const-ified, despite being simpler.
1 parent eb7a743 commit 370d31b

File tree

4 files changed

+35
-15
lines changed

4 files changed

+35
-15
lines changed

library/core/src/array/ascii.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ impl<const N: usize> [u8; N] {
77
#[unstable(feature = "ascii_char", issue = "110998")]
88
#[must_use]
99
#[inline]
10-
pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
10+
pub const fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
1111
if self.is_ascii() {
1212
// SAFETY: Just checked that it's ASCII
1313
Some(unsafe { self.as_ascii_unchecked() })

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@
149149
#![feature(const_slice_from_raw_parts_mut)]
150150
#![feature(const_slice_from_ref)]
151151
#![feature(const_slice_index)]
152+
#![feature(const_slice_is_ascii)]
152153
#![feature(const_slice_ptr_len)]
153154
#![feature(const_slice_split_at_mut)]
154155
#![feature(const_str_from_utf8_unchecked_mut)]

library/core/src/slice/ascii.rs

+30-12
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ use crate::ops;
1010
impl [u8] {
1111
/// Checks if all bytes in this slice are within the ASCII range.
1212
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
13+
#[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
1314
#[must_use]
1415
#[inline]
15-
pub fn is_ascii(&self) -> bool {
16+
pub const fn is_ascii(&self) -> bool {
1617
is_ascii(self)
1718
}
1819

@@ -21,7 +22,7 @@ impl [u8] {
2122
#[unstable(feature = "ascii_char", issue = "110998")]
2223
#[must_use]
2324
#[inline]
24-
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
25+
pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
2526
if self.is_ascii() {
2627
// SAFETY: Just checked that it's ASCII
2728
Some(unsafe { self.as_ascii_unchecked() })
@@ -262,7 +263,7 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
262263
/// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
263264
/// from `../str/mod.rs`, which does something similar for utf8 validation.
264265
#[inline]
265-
fn contains_nonascii(v: usize) -> bool {
266+
const fn contains_nonascii(v: usize) -> bool {
266267
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
267268
(NONASCII_MASK & v) != 0
268269
}
@@ -280,7 +281,7 @@ fn contains_nonascii(v: usize) -> bool {
280281
/// If any of these loads produces something for which `contains_nonascii`
281282
/// (above) returns true, then we know the answer is false.
282283
#[inline]
283-
fn is_ascii(s: &[u8]) -> bool {
284+
const fn is_ascii(s: &[u8]) -> bool {
284285
const USIZE_SIZE: usize = mem::size_of::<usize>();
285286

286287
let len = s.len();
@@ -292,7 +293,16 @@ fn is_ascii(s: &[u8]) -> bool {
292293
// We also do this for architectures where `size_of::<usize>()` isn't
293294
// sufficient alignment for `usize`, because it's a weird edge case.
294295
if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem::align_of::<usize>() {
295-
return s.iter().all(|b| b.is_ascii());
296+
// FIXME: once iterators and closures can be used in `const fn`,
297+
// return s.iter().all(|b| b.is_ascii());
298+
let mut i = 0;
299+
while i < len {
300+
if !s[i].is_ascii() {
301+
return false;
302+
}
303+
i += 1;
304+
}
305+
return true;
296306
}
297307

298308
// We always read the first word unaligned, which means `align_offset` is
@@ -321,18 +331,26 @@ fn is_ascii(s: &[u8]) -> bool {
321331
// Paranoia check about alignment, since we're about to do a bunch of
322332
// unaligned loads. In practice this should be impossible barring a bug in
323333
// `align_offset` though.
324-
debug_assert_eq!(word_ptr.addr() % mem::align_of::<usize>(), 0);
334+
// While this method is allowed to spuriously fail in CTFE, if it doesn't
335+
// have alignment information it should have given a `usize::MAX` for
336+
// `align_offset` earlier, sending things through the scalar path instead of
337+
// this one, so this check should pass if it's reachable.
338+
debug_assert!(word_ptr.is_aligned_to(mem::align_of::<usize>()));
325339

326340
// Read subsequent words until the last aligned word, excluding the last
327341
// aligned word by itself to be done in tail check later, to ensure that
328342
// tail is always one `usize` at most to extra branch `byte_pos == len`.
329343
while byte_pos < len - USIZE_SIZE {
330-
debug_assert!(
331-
// Sanity check that the read is in bounds
332-
(word_ptr.addr() + USIZE_SIZE) <= start.addr().wrapping_add(len) &&
333-
// And that our assumptions about `byte_pos` hold.
334-
(word_ptr.addr() - start.addr()) == byte_pos
335-
);
344+
// Sanity check that the read is in bounds
345+
debug_assert!(byte_pos + USIZE_SIZE <= len);
346+
// And that our assumptions about `byte_pos` hold.
347+
debug_assert!(matches!(
348+
word_ptr.cast::<u8>().guaranteed_eq(start.wrapping_add(byte_pos)),
349+
// These are from the same allocation, so will hopefully always be
350+
// known to match even in CTFE, but if it refuses to compare them
351+
// that's ok since it's just a debug check anyway.
352+
None | Some(true),
353+
));
336354

337355
// SAFETY: We know `word_ptr` is properly aligned (because of
338356
// `align_offset`), and we know that we have enough bytes between `word_ptr` and the end

library/core/src/str/mod.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -2358,9 +2358,10 @@ impl str {
23582358
/// assert!(!non_ascii.is_ascii());
23592359
/// ```
23602360
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
2361+
#[rustc_const_unstable(feature = "const_slice_is_ascii", issue = "111090")]
23612362
#[must_use]
23622363
#[inline]
2363-
pub fn is_ascii(&self) -> bool {
2364+
pub const fn is_ascii(&self) -> bool {
23642365
// We can treat each byte as character here: all multibyte characters
23652366
// start with a byte that is not in the ASCII range, so we will stop
23662367
// there already.
@@ -2372,7 +2373,7 @@ impl str {
23722373
#[unstable(feature = "ascii_char", issue = "110998")]
23732374
#[must_use]
23742375
#[inline]
2375-
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
2376+
pub const fn as_ascii(&self) -> Option<&[ascii::Char]> {
23762377
// Like in `is_ascii`, we can work on the bytes directly.
23772378
self.as_bytes().as_ascii()
23782379
}

0 commit comments

Comments
 (0)