Skip to content

Commit cf6f64a

Browse files
committed
Make slice->str conversion and related functions const
This commit makes the following functions from `core::str` `const fn`: - `from_utf8[_mut]` (`feature(const_str_from_utf8)`) - `from_utf8_unchecked_mut` (`feature(const_str_from_utf8_unchecked_mut)`) - `Utf8Error::{valid_up_to,error_len}` (`feature(const_str_from_utf8)`)
1 parent c9c4b5d commit cf6f64a

File tree

6 files changed

+106
-24
lines changed

6 files changed

+106
-24
lines changed

library/alloc/tests/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#![feature(const_btree_new)]
2626
#![feature(const_default_impls)]
2727
#![feature(const_trait_impl)]
28+
#![feature(const_str_from_utf8)]
2829

2930
use std::collections::hash_map::DefaultHasher;
3031
use std::hash::{Hash, Hasher};

library/alloc/tests/str.rs

+61-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::assert_matches::assert_matches;
12
use std::borrow::Cow;
23
use std::cmp::Ordering::{Equal, Greater, Less};
34
use std::str::{from_utf8, from_utf8_unchecked};
@@ -883,6 +884,33 @@ fn test_is_utf8() {
883884
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
884885
}
885886

887+
#[test]
888+
fn test_const_is_utf8() {
889+
const _: () = {
890+
// deny overlong encodings
891+
assert!(from_utf8(&[0xc0, 0x80]).is_err());
892+
assert!(from_utf8(&[0xc0, 0xae]).is_err());
893+
assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
894+
assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
895+
assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
896+
assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
897+
assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
898+
899+
// deny surrogates
900+
assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
901+
assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
902+
903+
assert!(from_utf8(&[0xC2, 0x80]).is_ok());
904+
assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
905+
assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
906+
assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
907+
assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
908+
assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
909+
assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
910+
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
911+
};
912+
}
913+
886914
#[test]
887915
fn from_utf8_mostly_ascii() {
888916
// deny invalid bytes embedded in long stretches of ascii
@@ -895,13 +923,43 @@ fn from_utf8_mostly_ascii() {
895923
}
896924
}
897925

926+
#[test]
927+
fn const_from_utf8_mostly_ascii() {
928+
const _: () = {
929+
// deny invalid bytes embedded in long stretches of ascii
930+
let mut i = 32;
931+
while i < 64 {
932+
let mut data = [0; 128];
933+
data[i] = 0xC0;
934+
assert!(from_utf8(&data).is_err());
935+
data[i] = 0xC2;
936+
assert!(from_utf8(&data).is_err());
937+
938+
i = i + 1;
939+
}
940+
};
941+
}
942+
898943
#[test]
899944
fn from_utf8_error() {
900945
macro_rules! test {
901-
($input: expr, $expected_valid_up_to: expr, $expected_error_len: expr) => {
946+
($input: expr, $expected_valid_up_to:pat, $expected_error_len:pat) => {
902947
let error = from_utf8($input).unwrap_err();
903-
assert_eq!(error.valid_up_to(), $expected_valid_up_to);
904-
assert_eq!(error.error_len(), $expected_error_len);
948+
assert_matches!(error.valid_up_to(), $expected_valid_up_to);
949+
assert_matches!(error.error_len(), $expected_error_len);
950+
951+
const _: () = {
952+
match from_utf8($input) {
953+
Err(error) => {
954+
let valid_up_to = error.valid_up_to();
955+
let error_len = error.error_len();
956+
957+
assert!(matches!(valid_up_to, $expected_valid_up_to));
958+
assert!(matches!(error_len, $expected_error_len));
959+
}
960+
Ok(_) => unreachable!(),
961+
}
962+
};
905963
};
906964
}
907965
test!(b"A\xC3\xA9 \xFF ", 4, Some(1));

library/core/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
#![allow(explicit_outlives_requirements)]
9898
//
9999
// Library features for const fns:
100+
#![feature(const_align_offset)]
100101
#![feature(const_align_of_val)]
101102
#![feature(const_alloc_layout)]
102103
#![feature(const_arguments_as_str)]
@@ -130,6 +131,7 @@
130131
#![feature(const_size_of_val)]
131132
#![feature(const_slice_from_raw_parts)]
132133
#![feature(const_slice_ptr_len)]
134+
#![feature(const_str_from_utf8_unchecked_mut)]
133135
#![feature(const_swap)]
134136
#![feature(const_trait_impl)]
135137
#![feature(const_type_id)]
@@ -138,6 +140,7 @@
138140
#![feature(duration_consts_2)]
139141
#![feature(ptr_metadata)]
140142
#![feature(slice_ptr_get)]
143+
#![feature(str_internals)]
141144
#![feature(variant_count)]
142145
#![feature(const_array_from_ref)]
143146
#![feature(const_slice_from_ref)]

library/core/src/str/converts.rs

+22-9
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,16 @@ use super::Utf8Error;
8282
/// assert_eq!("💖", sparkle_heart);
8383
/// ```
8484
#[stable(feature = "rust1", since = "1.0.0")]
85-
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
86-
run_utf8_validation(v)?;
87-
// SAFETY: Just ran validation.
88-
Ok(unsafe { from_utf8_unchecked(v) })
85+
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
86+
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
87+
// This should use `?` again, once it's `const`
88+
match run_utf8_validation(v) {
89+
Ok(_) => {
90+
// SAFETY: validation succeeded.
91+
Ok(unsafe { from_utf8_unchecked(v) })
92+
}
93+
Err(err) => Err(err),
94+
}
8995
}
9096

9197
/// Converts a mutable slice of bytes to a mutable string slice.
@@ -119,10 +125,16 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
119125
/// See the docs for [`Utf8Error`] for more details on the kinds of
120126
/// errors that can be returned.
121127
#[stable(feature = "str_mut_extras", since = "1.20.0")]
122-
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
123-
run_utf8_validation(v)?;
124-
// SAFETY: Just ran validation.
125-
Ok(unsafe { from_utf8_unchecked_mut(v) })
128+
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
129+
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
130+
// This should use `?` again, once it's `const`
131+
match run_utf8_validation(v) {
132+
Ok(_) => {
133+
// SAFETY: validation succeeded.
134+
Ok(unsafe { from_utf8_unchecked_mut(v) })
135+
}
136+
Err(err) => Err(err),
137+
}
126138
}
127139

128140
/// Converts a slice of bytes to a string slice without checking
@@ -184,7 +196,8 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
184196
#[inline]
185197
#[must_use]
186198
#[stable(feature = "str_mut_extras", since = "1.20.0")]
187-
pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
199+
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "none")]
200+
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
188201
// SAFETY: the caller must guarantee that the bytes `v`
189202
// are valid UTF-8, thus the cast to `*mut str` is safe.
190203
// Also, the pointer dereference is safe because that pointer

library/core/src/str/error.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,10 @@ impl Utf8Error {
7272
/// assert_eq!(1, error.valid_up_to());
7373
/// ```
7474
#[stable(feature = "utf8_error", since = "1.5.0")]
75+
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
7576
#[must_use]
7677
#[inline]
77-
pub fn valid_up_to(&self) -> usize {
78+
pub const fn valid_up_to(&self) -> usize {
7879
self.valid_up_to
7980
}
8081

@@ -94,10 +95,15 @@ impl Utf8Error {
9495
///
9596
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
9697
#[stable(feature = "utf8_error_error_len", since = "1.20.0")]
98+
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
9799
#[must_use]
98100
#[inline]
99-
pub fn error_len(&self) -> Option<usize> {
100-
self.error_len.map(|len| len as usize)
101+
pub const fn error_len(&self) -> Option<usize> {
102+
// This should become `map` again, once it's `const`
103+
match self.error_len {
104+
Some(len) => Some(len as usize),
105+
None => None,
106+
}
101107
}
102108
}
103109

library/core/src/str/validations.rs

+10-9
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,25 @@ use super::Utf8Error;
88
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
99
/// for width 3, and 3 bits for width 4.
1010
#[inline]
11-
fn utf8_first_byte(byte: u8, width: u32) -> u32 {
11+
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
1212
(byte & (0x7F >> width)) as u32
1313
}
1414

1515
/// Returns the value of `ch` updated with continuation byte `byte`.
1616
#[inline]
17-
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
17+
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
1818
(ch << 6) | (byte & CONT_MASK) as u32
1919
}
2020

2121
/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
2222
/// bits `10`).
2323
#[inline]
24-
pub(super) fn utf8_is_cont_byte(byte: u8) -> bool {
24+
pub(super) const fn utf8_is_cont_byte(byte: u8) -> bool {
2525
(byte as i8) < -64
2626
}
2727

2828
#[inline]
29-
fn unwrap_or_0(opt: Option<&u8>) -> u8 {
29+
const fn unwrap_or_0(opt: Option<&u8>) -> u8 {
3030
match opt {
3131
Some(&byte) => byte,
3232
None => 0,
@@ -105,14 +105,15 @@ const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
105105

106106
/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
107107
#[inline]
108-
fn contains_nonascii(x: usize) -> bool {
108+
const fn contains_nonascii(x: usize) -> bool {
109109
(x & NONASCII_MASK) != 0
110110
}
111111

112112
/// Walks through `v` checking that it's a valid UTF-8 sequence,
113113
/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
114114
#[inline(always)]
115-
pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
115+
#[rustc_const_unstable(feature = "str_internals", issue = "none")]
116+
pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
116117
let mut index = 0;
117118
let len = v.len();
118119

@@ -142,7 +143,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
142143

143144
let first = v[index];
144145
if first >= 128 {
145-
let w = UTF8_CHAR_WIDTH[first as usize];
146+
let w = utf8_char_width(first);
146147
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
147148
// first C2 80 last DF BF
148149
// 3-byte encoding is for codepoints \u{0800} to \u{ffff}
@@ -230,7 +231,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
230231
}
231232

232233
// https://tools.ietf.org/html/rfc3629
233-
static UTF8_CHAR_WIDTH: [u8; 256] = [
234+
const UTF8_CHAR_WIDTH: &[u8; 256] = &[
234235
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
235236
1, // 0x1F
236237
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -253,7 +254,7 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
253254
#[unstable(feature = "str_internals", issue = "none")]
254255
#[must_use]
255256
#[inline]
256-
pub fn utf8_char_width(b: u8) -> usize {
257+
pub const fn utf8_char_width(b: u8) -> usize {
257258
UTF8_CHAR_WIDTH[b as usize] as usize
258259
}
259260

0 commit comments

Comments
 (0)