From 41b04653f3d2fd9cd6451a586354257d661385e2 Mon Sep 17 00:00:00 2001 From: Thalia Archibald Date: Sun, 23 Mar 2025 19:45:27 -0700 Subject: [PATCH 01/15] Trusty: Implement write_vectored for stdio Currently, `write` for stdout and stderr on Trusty is implemented with the semantics of `write_all`. Instead, call the underlying syscall only once in `write` and use the default implementation of `write_all` like other platforms. Also, implement `write_vectored` by adding support for `IoSlice`. Refactor stdin to reuse the unsupported type like #136769. --- library/std/src/sys/io/io_slice/iovec.rs | 2 +- library/std/src/sys/io/mod.rs | 2 +- library/std/src/sys/stdio/trusty.rs | 83 ++++++++++++++---------- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/library/std/src/sys/io/io_slice/iovec.rs b/library/std/src/sys/io/io_slice/iovec.rs index 072191315f7c5..df56358969a39 100644 --- a/library/std/src/sys/io/io_slice/iovec.rs +++ b/library/std/src/sys/io/io_slice/iovec.rs @@ -1,6 +1,6 @@ #[cfg(target_os = "hermit")] use hermit_abi::iovec; -#[cfg(target_family = "unix")] +#[cfg(any(target_family = "unix", target_os = "trusty"))] use libc::iovec; use crate::ffi::c_void; diff --git a/library/std/src/sys/io/mod.rs b/library/std/src/sys/io/mod.rs index e00b479109f39..4d0365d42fd9b 100644 --- a/library/std/src/sys/io/mod.rs +++ b/library/std/src/sys/io/mod.rs @@ -2,7 +2,7 @@ mod io_slice { cfg_if::cfg_if! { - if #[cfg(any(target_family = "unix", target_os = "hermit", target_os = "solid_asp3"))] { + if #[cfg(any(target_family = "unix", target_os = "hermit", target_os = "solid_asp3", target_os = "trusty"))] { mod iovec; pub use iovec::*; } else if #[cfg(target_os = "windows")] { diff --git a/library/std/src/sys/stdio/trusty.rs b/library/std/src/sys/stdio/trusty.rs index d393e95394d1a..e05461aa44a73 100644 --- a/library/std/src/sys/stdio/trusty.rs +++ b/library/std/src/sys/stdio/trusty.rs @@ -1,21 +1,14 @@ -use crate::io; +#[expect(dead_code)] +#[path = "unsupported.rs"] +mod unsupported_stdio; -pub struct Stdin; +use crate::cmp; +use crate::io::{self, IoSlice}; + +pub type Stdin = unsupported_stdio::Stdin; pub struct Stdout; pub struct Stderr; -impl Stdin { - pub const fn new() -> Stdin { - Stdin - } -} - -impl io::Read for Stdin { - fn read(&mut self, _buf: &mut [u8]) -> io::Result { - Ok(0) - } -} - impl Stdout { pub const fn new() -> Stdout { Stdout @@ -24,7 +17,16 @@ impl Stdout { impl io::Write for Stdout { fn write(&mut self, buf: &[u8]) -> io::Result { - _write(libc::STDOUT_FILENO, buf) + write(libc::STDOUT_FILENO, buf) + } + + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + write_vectored(libc::STDOUT_FILENO, bufs) + } + + #[inline] + fn is_write_vectored(&self) -> bool { + true } fn flush(&mut self) -> io::Result<()> { @@ -40,7 +42,16 @@ impl Stderr { impl io::Write for Stderr { fn write(&mut self, buf: &[u8]) -> io::Result { - _write(libc::STDERR_FILENO, buf) + write(libc::STDERR_FILENO, buf) + } + + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + write_vectored(libc::STDERR_FILENO, bufs) + } + + #[inline] + fn is_write_vectored(&self) -> bool { + true } fn flush(&mut self) -> io::Result<()> { @@ -48,7 +59,7 @@ impl io::Write for Stderr { } } -pub const STDIN_BUF_SIZE: usize = 0; +pub const STDIN_BUF_SIZE: usize = unsupported_stdio::STDIN_BUF_SIZE; pub fn is_ebadf(_err: &io::Error) -> bool { true @@ -58,24 +69,24 @@ pub fn panic_output() -> Option { Some(Stderr) } -fn _write(fd: i32, message: &[u8]) -> io::Result { - let mut iov = libc::iovec { iov_base: message.as_ptr() as *mut _, iov_len: message.len() }; - loop { - // SAFETY: syscall, safe arguments. - let ret = unsafe { libc::writev(fd, &iov, 1) }; - if ret < 0 { - return Err(io::Error::last_os_error()); - } - let ret = ret as usize; - if ret > iov.iov_len { - return Err(io::Error::last_os_error()); - } - if ret == iov.iov_len { - return Ok(message.len()); - } - // SAFETY: ret has been checked to be less than the length of - // the buffer - iov.iov_base = unsafe { iov.iov_base.add(ret) }; - iov.iov_len -= ret; +fn write(fd: i32, buf: &[u8]) -> io::Result { + let iov = libc::iovec { iov_base: buf.as_ptr() as *mut _, iov_len: buf.len() }; + // SAFETY: syscall, safe arguments. + let ret = unsafe { libc::writev(fd, &iov, 1) }; + // This check includes ret < 0, since the length is at most isize::MAX. + if ret as usize > iov.iov_len { + return Err(io::Error::last_os_error()); + } + Ok(ret as usize) +} + +fn write_vectored(fd: i32, bufs: &[IoSlice<'_>]) -> io::Result { + let iov = bufs.as_ptr() as *const libc::iovec; + let len = cmp::min(bufs.len(), libc::c_int::MAX as usize) as libc::c_int; + // SAFETY: syscall, safe arguments. + let ret = unsafe { libc::writev(fd, iov, len) }; + if ret < 0 { + return Err(io::Error::last_os_error()); } + Ok(ret as usize) } From ed35b9be289ba91771d218bd9debc8e30ecd8d83 Mon Sep 17 00:00:00 2001 From: Nikolai Kuklin Date: Fri, 28 Mar 2025 18:12:18 +0100 Subject: [PATCH 02/15] Add `slice::align_to_uninit_mut` --- library/core/src/slice/mod.rs | 53 +++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 5bb7243c4491b..058515efdfd74 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -8,7 +8,7 @@ use crate::cmp::Ordering::{self, Equal, Greater, Less}; use crate::intrinsics::{exact_div, unchecked_sub}; -use crate::mem::{self, SizedTypeProperties}; +use crate::mem::{self, MaybeUninit, SizedTypeProperties}; use crate::num::NonZero; use crate::ops::{OneSidedRange, OneSidedRangeBound, Range, RangeBounds, RangeInclusive}; use crate::panic::const_panic; @@ -4589,7 +4589,7 @@ impl [T] { // or generate worse code otherwise. This is also why we need to go // through a raw pointer here. let slice: *mut [T] = self; - let mut arr: mem::MaybeUninit<[&mut I::Output; N]> = mem::MaybeUninit::uninit(); + let mut arr: MaybeUninit<[&mut I::Output; N]> = MaybeUninit::uninit(); let arr_ptr = arr.as_mut_ptr(); // SAFETY: We expect `indices` to contain disjunct values that are @@ -4774,6 +4774,55 @@ impl [T] { } } +impl [MaybeUninit] { + /// Transmutes the mutable uninitialized slice to a mutable uninitialized slice of + /// another type, ensuring alignment of the types is maintained. + /// + /// This is a safe wrapper around [`slice::align_to_mut`], so inherits the same + /// guarantees as that method. + /// + /// # Examples + /// + /// ``` + /// #![feature(align_to_uninit_mut)] + /// use std::mem::MaybeUninit; + /// + /// pub struct BumpAllocator<'scope> { + /// memory: &'scope mut [MaybeUninit], + /// } + /// + /// impl<'scope> BumpAllocator<'scope> { + /// pub fn new(memory: &'scope mut [MaybeUninit]) -> Self { + /// Self { memory } + /// } + /// pub fn try_alloc_uninit(&mut self) -> Option<&'scope mut MaybeUninit> { + /// let first_end = self.memory.as_ptr().align_offset(align_of::()) + size_of::(); + /// let prefix = self.memory.split_off_mut(..first_end)?; + /// Some(&mut prefix.align_to_uninit_mut::().1[0]) + /// } + /// pub fn try_alloc_u32(&mut self, value: u32) -> Option<&'scope mut u32> { + /// let uninit = self.try_alloc_uninit()?; + /// Some(uninit.write(value)) + /// } + /// } + /// + /// let mut memory = [MaybeUninit::::uninit(); 10]; + /// let mut allocator = BumpAllocator::new(&mut memory); + /// let v = allocator.try_alloc_u32(42); + /// assert_eq!(v, Some(&mut 42)); + /// ``` + #[unstable(feature = "align_to_uninit_mut", issue = "139062")] + #[inline] + #[must_use] + pub fn align_to_uninit_mut(&mut self) -> (&mut Self, &mut [MaybeUninit], &mut Self) { + // SAFETY: `MaybeUninit` is transparent. Correct size and alignment are guaranteed by + // `align_to_mut` itself. Therefore the only thing that we have to ensure for a safe + // `transmute` is that the values are valid for the types involved. But for `MaybeUninit` + // any values are valid, so this operation is safe. + unsafe { self.align_to_mut() } + } +} + impl [[T; N]] { /// Takes a `&[[T; N]]`, and flattens it to a `&[T]`. /// From aff2bc7a8815b89dd8dccf2564f5178118c1a3ec Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 4 Apr 2025 14:44:45 +0200 Subject: [PATCH 03/15] Replace `rustc_lexer/unescape` with `rustc-literal-escaper` crate --- Cargo.lock | 10 +- compiler/rustc_ast/Cargo.toml | 2 +- compiler/rustc_ast/src/util/literal.rs | 2 +- compiler/rustc_lexer/src/lib.rs | 1 - compiler/rustc_lexer/src/unescape.rs | 438 ------------------ compiler/rustc_lexer/src/unescape/tests.rs | 286 ------------ compiler/rustc_parse/Cargo.toml | 1 + compiler/rustc_parse/src/lexer/mod.rs | 10 +- .../src/lexer/unescape_error_reporting.rs | 2 +- compiler/rustc_parse/src/parser/expr.rs | 2 +- compiler/rustc_parse_format/Cargo.toml | 1 + compiler/rustc_parse_format/src/lib.rs | 10 +- 12 files changed, 22 insertions(+), 743 deletions(-) delete mode 100644 compiler/rustc_lexer/src/unescape.rs delete mode 100644 compiler/rustc_lexer/src/unescape/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 96526f7e9e7da..0bdddcc7a64de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3150,6 +3150,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +[[package]] +name = "rustc-literal-escaper" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdd0fcb1409d38cb2d940400497e2384a4a04b8685ee92a0a7a8986ccd72115" + [[package]] name = "rustc-main" version = "0.0.0" @@ -3242,10 +3248,10 @@ version = "0.0.0" dependencies = [ "bitflags", "memchr", + "rustc-literal-escaper", "rustc_ast_ir", "rustc_data_structures", "rustc_index", - "rustc_lexer", "rustc_macros", "rustc_serialize", "rustc_span", @@ -4200,6 +4206,7 @@ name = "rustc_parse" version = "0.0.0" dependencies = [ "bitflags", + "rustc-literal-escaper", "rustc_ast", "rustc_ast_pretty", "rustc_data_structures", @@ -4222,6 +4229,7 @@ dependencies = [ name = "rustc_parse_format" version = "0.0.0" dependencies = [ + "rustc-literal-escaper", "rustc_index", "rustc_lexer", ] diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml index 902287d032802..73b5732720480 100644 --- a/compiler/rustc_ast/Cargo.toml +++ b/compiler/rustc_ast/Cargo.toml @@ -10,7 +10,7 @@ memchr = "2.7.4" rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_index = { path = "../rustc_index" } -rustc_lexer = { path = "../rustc_lexer" } +rustc-literal-escaper = "0.0.1" rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } rustc_span = { path = "../rustc_span" } diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 6896ac723fa58..b8526cf9d9529 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -2,7 +2,7 @@ use std::{ascii, fmt, str}; -use rustc_lexer::unescape::{ +use rustc_literal_escaper::{ MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, }; use rustc_span::{Span, Symbol, kw, sym}; diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 61638e45253fd..f9c71b2fa651c 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -26,7 +26,6 @@ // tidy-alphabetical-end mod cursor; -pub mod unescape; #[cfg(test)] mod tests; diff --git a/compiler/rustc_lexer/src/unescape.rs b/compiler/rustc_lexer/src/unescape.rs deleted file mode 100644 index d6ea4249247f3..0000000000000 --- a/compiler/rustc_lexer/src/unescape.rs +++ /dev/null @@ -1,438 +0,0 @@ -//! Utilities for validating string and char literals and turning them into -//! values they represent. - -use std::ops::Range; -use std::str::Chars; - -use Mode::*; - -#[cfg(test)] -mod tests; - -/// Errors and warnings that can occur during string unescaping. They mostly -/// relate to malformed escape sequences, but there are a few that are about -/// other problems. -#[derive(Debug, PartialEq, Eq)] -pub enum EscapeError { - /// Expected 1 char, but 0 were found. - ZeroChars, - /// Expected 1 char, but more than 1 were found. - MoreThanOneChar, - - /// Escaped '\' character without continuation. - LoneSlash, - /// Invalid escape character (e.g. '\z'). - InvalidEscape, - /// Raw '\r' encountered. - BareCarriageReturn, - /// Raw '\r' encountered in raw string. - BareCarriageReturnInRawString, - /// Unescaped character that was expected to be escaped (e.g. raw '\t'). - EscapeOnlyChar, - - /// Numeric character escape is too short (e.g. '\x1'). - TooShortHexEscape, - /// Invalid character in numeric escape (e.g. '\xz') - InvalidCharInHexEscape, - /// Character code in numeric escape is non-ascii (e.g. '\xFF'). - OutOfRangeHexEscape, - - /// '\u' not followed by '{'. - NoBraceInUnicodeEscape, - /// Non-hexadecimal value in '\u{..}'. - InvalidCharInUnicodeEscape, - /// '\u{}' - EmptyUnicodeEscape, - /// No closing brace in '\u{..}', e.g. '\u{12'. - UnclosedUnicodeEscape, - /// '\u{_12}' - LeadingUnderscoreUnicodeEscape, - /// More than 6 characters in '\u{..}', e.g. '\u{10FFFF_FF}' - OverlongUnicodeEscape, - /// Invalid in-bound unicode character code, e.g. '\u{DFFF}'. - LoneSurrogateUnicodeEscape, - /// Out of bounds unicode character code, e.g. '\u{FFFFFF}'. - OutOfRangeUnicodeEscape, - - /// Unicode escape code in byte literal. - UnicodeEscapeInByte, - /// Non-ascii character in byte literal, byte string literal, or raw byte string literal. - NonAsciiCharInByte, - - // `\0` in a C string literal. - NulInCStr, - - /// After a line ending with '\', the next line contains whitespace - /// characters that are not skipped. - UnskippedWhitespaceWarning, - - /// After a line ending with '\', multiple lines are skipped. - MultipleSkippedLinesWarning, -} - -impl EscapeError { - /// Returns true for actual errors, as opposed to warnings. - pub fn is_fatal(&self) -> bool { - !matches!( - self, - EscapeError::UnskippedWhitespaceWarning | EscapeError::MultipleSkippedLinesWarning - ) - } -} - -/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without -/// quotes) and produces a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. For `Char` and `Byte` modes, -/// the callback will be called exactly once. -pub fn unescape_unicode(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - match mode { - Char | Byte => { - let mut chars = src.chars(); - let res = unescape_char_or_byte(&mut chars, mode); - callback(0..(src.len() - chars.as_str().len()), res); - } - Str | ByteStr => unescape_non_raw_common(src, mode, callback), - RawStr | RawByteStr => check_raw_common(src, mode, callback), - RawCStr => check_raw_common(src, mode, &mut |r, mut result| { - if let Ok('\0') = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - CStr => unreachable!(), - } -} - -/// Used for mixed utf8 string literals, i.e. those that allow both unicode -/// chars and high bytes. -pub enum MixedUnit { - /// Used for ASCII chars (written directly or via `\x00`..`\x7f` escapes) - /// and Unicode chars (written directly or via `\u` escapes). - /// - /// For example, if '¥' appears in a string it is represented here as - /// `MixedUnit::Char('¥')`, and it will be appended to the relevant byte - /// string as the two-byte UTF-8 sequence `[0xc2, 0xa5]` - Char(char), - - /// Used for high bytes (`\x80`..`\xff`). - /// - /// For example, if `\xa5` appears in a string it is represented here as - /// `MixedUnit::HighByte(0xa5)`, and it will be appended to the relevant - /// byte string as the single byte `0xa5`. - HighByte(u8), -} - -impl From for MixedUnit { - fn from(c: char) -> Self { - MixedUnit::Char(c) - } -} - -impl From for MixedUnit { - fn from(n: u8) -> Self { - if n.is_ascii() { MixedUnit::Char(n as char) } else { MixedUnit::HighByte(n) } - } -} - -/// Takes the contents of a mixed-utf8 literal (without quotes) and produces -/// a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. -pub fn unescape_mixed(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - match mode { - CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| { - if let Ok(MixedUnit::Char('\0')) = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(), - } -} - -/// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error. -pub fn unescape_char(src: &str) -> Result { - unescape_char_or_byte(&mut src.chars(), Char) -} - -/// Takes a contents of a byte literal (without quotes), and returns an -/// unescaped byte or an error. -pub fn unescape_byte(src: &str) -> Result { - unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char) -} - -/// What kind of literal do we parse. -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Mode { - Char, - - Byte, - - Str, - RawStr, - - ByteStr, - RawByteStr, - - CStr, - RawCStr, -} - -impl Mode { - pub fn in_double_quotes(self) -> bool { - match self { - Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true, - Char | Byte => false, - } - } - - /// Are `\x80`..`\xff` allowed? - fn allow_high_bytes(self) -> bool { - match self { - Char | Str => false, - Byte | ByteStr | CStr => true, - RawStr | RawByteStr | RawCStr => unreachable!(), - } - } - - /// Are unicode (non-ASCII) chars allowed? - #[inline] - fn allow_unicode_chars(self) -> bool { - match self { - Byte | ByteStr | RawByteStr => false, - Char | Str | RawStr | CStr | RawCStr => true, - } - } - - /// Are unicode escapes (`\u`) allowed? - fn allow_unicode_escapes(self) -> bool { - match self { - Byte | ByteStr => false, - Char | Str | CStr => true, - RawByteStr | RawStr | RawCStr => unreachable!(), - } - } - - pub fn prefix_noraw(self) -> &'static str { - match self { - Char | Str | RawStr => "", - Byte | ByteStr | RawByteStr => "b", - CStr | RawCStr => "c", - } - } -} - -fn scan_escape + From>( - chars: &mut Chars<'_>, - mode: Mode, -) -> Result { - // Previous character was '\\', unescape what follows. - let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', - 'x' => { - // Parse hexadecimal character code. - - let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let value = (hi * 16 + lo) as u8; - - return if !mode.allow_high_bytes() && !value.is_ascii() { - Err(EscapeError::OutOfRangeHexEscape) - } else { - // This may be a high byte, but that will only happen if `T` is - // `MixedUnit`, because of the `allow_high_bytes` check above. - Ok(T::from(value)) - }; - } - 'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from), - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(T::from(res)) -} - -fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result { - // We've parsed '\u', now we have to parse '{..}'. - - if chars.next() != Some('{') { - return Err(EscapeError::NoBraceInUnicodeEscape); - } - - // First character must be a hexadecimal digit. - let mut n_digits = 1; - let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { - '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), - '}' => return Err(EscapeError::EmptyUnicodeEscape), - c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, - }; - - // First character is valid, now parse the rest of the number - // and closing brace. - loop { - match chars.next() { - None => return Err(EscapeError::UnclosedUnicodeEscape), - Some('_') => continue, - Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - - // Incorrect syntax has higher priority for error reporting - // than unallowed value for a literal. - if !allow_unicode_escapes { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or({ - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - }); - } - Some(c) => { - let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; - n_digits += 1; - if n_digits > 6 { - // Stop updating value since we're sure that it's incorrect already. - continue; - } - value = value * 16 + digit; - } - }; - } -} - -#[inline] -fn ascii_check(c: char, allow_unicode_chars: bool) -> Result { - if allow_unicode_chars || c.is_ascii() { Ok(c) } else { Err(EscapeError::NonAsciiCharInByte) } -} - -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - let c = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match c { - '\\' => scan_escape(chars, mode), - '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode.allow_unicode_chars()), - }?; - if chars.next().is_some() { - return Err(EscapeError::MoreThanOneChar); - } - Ok(res) -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -fn unescape_non_raw_common + From>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here is complicated because - // `skip_ascii_whitespace` makes us to skip over chars without counting - // them in the range computation. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\\' => { - match chars.clone().next() { - Some('\n') => { - // Rust language specification requires us to skip whitespaces - // if unescaped '\' character is followed by '\n'. - // For details see [Rust language reference] - // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, &mut |range, err| { - callback(range, Err(err)) - }); - continue; - } - _ => scan_escape::(&mut chars, mode), - } - } - '"' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, allow_unicode_chars).map(T::from), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); - } -} - -fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) -where - F: FnMut(Range, EscapeError), -{ - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, EscapeError::MultipleSkippedLinesWarning); - } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().next() { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, EscapeError::UnskippedWhitespaceWarning); - } - } - *chars = tail.chars(); -} - -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only produce errors on bare CR. -fn check_raw_common(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here matches the one in - // `unescape_non_raw_common` for consistency, even though this function - // doesn't have to worry about skipping any chars. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\r' => Err(EscapeError::BareCarriageReturnInRawString), - _ => ascii_check(c, allow_unicode_chars), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); - } -} - -#[inline] -pub fn byte_from_char(c: char) -> u8 { - let res = c as u32; - debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr"); - res as u8 -} diff --git a/compiler/rustc_lexer/src/unescape/tests.rs b/compiler/rustc_lexer/src/unescape/tests.rs deleted file mode 100644 index 5b99495f47581..0000000000000 --- a/compiler/rustc_lexer/src/unescape/tests.rs +++ /dev/null @@ -1,286 +0,0 @@ -use super::*; - -#[test] -fn test_unescape_char_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - assert_eq!(unescape_char(literal_text), Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - check(r"\u{0}x", EscapeError::MoreThanOneChar); - check(r"\u{1F63b}}", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - check("\\\r", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - check(r"\xff", EscapeError::OutOfRangeHexEscape); - check(r"\xFF", EscapeError::OutOfRangeHexEscape); - check(r"\x80", EscapeError::OutOfRangeHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); - - check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape); - - check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape); - check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape); -} - -#[test] -fn test_unescape_char_good() { - fn check(literal_text: &str, expected_char: char) { - assert_eq!(unescape_char(literal_text), Ok(expected_char)); - } - - check("a", 'a'); - check("ы", 'ы'); - check("🦀", '🦀'); - - check(r#"\""#, '"'); - check(r"\n", '\n'); - check(r"\r", '\r'); - check(r"\t", '\t'); - check(r"\\", '\\'); - check(r"\'", '\''); - check(r"\0", '\0'); - - check(r"\x00", '\0'); - check(r"\x5a", 'Z'); - check(r"\x5A", 'Z'); - check(r"\x7f", 127 as char); - - check(r"\u{0}", '\0'); - check(r"\u{000000}", '\0'); - check(r"\u{41}", 'A'); - check(r"\u{0041}", 'A'); - check(r"\u{00_41}", 'A'); - check(r"\u{4__1__}", 'A'); - check(r"\u{1F63b}", '😻'); -} - -#[test] -fn test_unescape_str_warn() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - // Check we can handle escaped newlines at the end of a file. - check("\\\n", &[]); - check("\\\n ", &[]); - - check( - "\\\n \u{a0} x", - &[ - (0..5, Err(EscapeError::UnskippedWhitespaceWarning)), - (3..5, Ok('\u{a0}')), - (5..6, Ok(' ')), - (6..7, Ok('x')), - ], - ); - check("\\\n \n x", &[(0..7, Err(EscapeError::MultipleSkippedLinesWarning)), (7..8, Ok('x'))]); -} - -#[test] -fn test_unescape_str_good() { - fn check(literal_text: &str, expected: &str) { - let mut buf = Ok(String::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::Str, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(c), - Err(e) => buf = Err((range, e)), - } - } - }); - assert_eq!(buf.as_deref(), Ok(expected)) - } - - check("foo", "foo"); - check("", ""); - check(" \t\n", " \t\n"); - - check("hello \\\n world", "hello world"); - check("thread's", "thread's") -} - -#[test] -fn test_unescape_byte_bad() { - fn check(literal_text: &str, expected_error: EscapeError) { - assert_eq!(unescape_byte(literal_text), Err(expected_error)); - } - - check("", EscapeError::ZeroChars); - check(r"\", EscapeError::LoneSlash); - - check("\n", EscapeError::EscapeOnlyChar); - check("\t", EscapeError::EscapeOnlyChar); - check("'", EscapeError::EscapeOnlyChar); - check("\r", EscapeError::BareCarriageReturn); - - check("spam", EscapeError::MoreThanOneChar); - check(r"\x0ff", EscapeError::MoreThanOneChar); - check(r#"\"a"#, EscapeError::MoreThanOneChar); - check(r"\na", EscapeError::MoreThanOneChar); - check(r"\ra", EscapeError::MoreThanOneChar); - check(r"\ta", EscapeError::MoreThanOneChar); - check(r"\\a", EscapeError::MoreThanOneChar); - check(r"\'a", EscapeError::MoreThanOneChar); - check(r"\0a", EscapeError::MoreThanOneChar); - - check(r"\v", EscapeError::InvalidEscape); - check(r"\💩", EscapeError::InvalidEscape); - check(r"\●", EscapeError::InvalidEscape); - - check(r"\x", EscapeError::TooShortHexEscape); - check(r"\x0", EscapeError::TooShortHexEscape); - check(r"\xa", EscapeError::TooShortHexEscape); - check(r"\xf", EscapeError::TooShortHexEscape); - check(r"\xx", EscapeError::InvalidCharInHexEscape); - check(r"\xы", EscapeError::InvalidCharInHexEscape); - check(r"\x🦀", EscapeError::InvalidCharInHexEscape); - check(r"\xtt", EscapeError::InvalidCharInHexEscape); - - check(r"\u", EscapeError::NoBraceInUnicodeEscape); - check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); - check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); - check(r"\u{", EscapeError::UnclosedUnicodeEscape); - check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); - check(r"\u{}", EscapeError::EmptyUnicodeEscape); - check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); - check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); - - check("ы", EscapeError::NonAsciiCharInByte); - check("🦀", EscapeError::NonAsciiCharInByte); - - check(r"\u{0}", EscapeError::UnicodeEscapeInByte); - check(r"\u{000000}", EscapeError::UnicodeEscapeInByte); - check(r"\u{41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0041}", EscapeError::UnicodeEscapeInByte); - check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte); - check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte); - check(r"\u{0}x", EscapeError::UnicodeEscapeInByte); - check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte); - check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte); - check(r"\u{D800}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte); - check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte); -} - -#[test] -fn test_unescape_byte_good() { - fn check(literal_text: &str, expected_byte: u8) { - assert_eq!(unescape_byte(literal_text), Ok(expected_byte)); - } - - check("a", b'a'); - - check(r#"\""#, b'"'); - check(r"\n", b'\n'); - check(r"\r", b'\r'); - check(r"\t", b'\t'); - check(r"\\", b'\\'); - check(r"\'", b'\''); - check(r"\0", b'\0'); - - check(r"\x00", b'\0'); - check(r"\x5a", b'Z'); - check(r"\x5A", b'Z'); - check(r"\x7f", 127); - check(r"\x80", 128); - check(r"\xff", 255); - check(r"\xFF", 255); -} - -#[test] -fn test_unescape_byte_str_good() { - fn check(literal_text: &str, expected: &[u8]) { - let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(byte_from_char(c)), - Err(e) => buf = Err((range, e)), - } - } - }); - assert_eq!(buf.as_deref(), Ok(expected)) - } - - check("foo", b"foo"); - check("", b""); - check(" \t\n", b" \t\n"); - - check("hello \\\n world", b"hello world"); - check("thread's", b"thread's") -} - -#[test] -fn test_unescape_raw_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]); -} - -#[test] -fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { - let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res))); - assert_eq!(unescaped, expected); - } - - check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]); - check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]); - check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]); -} diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index c9dcab0c871dd..dec1e09d8dd83 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -15,6 +15,7 @@ rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_index = { path = "../rustc_index" } rustc_lexer = { path = "../rustc_lexer" } rustc_macros = { path = "../rustc_macros" } +rustc-literal-escaper = "0.0.1" rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } thin-vec = "0.2.12" diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1d17290e1c706..4935fc03256f4 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -6,8 +6,8 @@ use rustc_ast::tokenstream::TokenStream; use rustc_ast::util::unicode::contains_text_flow_control_chars; use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; -use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; +use rustc_literal_escaper::{EscapeError, Mode, unescape_mixed, unescape_unicode}; use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX, @@ -970,9 +970,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_unicode(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) + unescape_unicode(src, mode, &mut |span, result| callback(span, result.map(drop))) }) } @@ -986,9 +984,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { postfix_len: u32, ) -> (token::LitKind, Symbol) { self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| { - unescape::unescape_mixed(src, mode, &mut |span, result| { - callback(span, result.map(drop)) - }) + unescape_mixed(src, mode, &mut |span, result| callback(span, result.map(drop))) }) } } diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c3f..ec59a1a01314e 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -4,7 +4,7 @@ use std::iter::once; use std::ops::Range; use rustc_errors::{Applicability, DiagCtxtHandle, ErrorGuaranteed}; -use rustc_lexer::unescape::{EscapeError, Mode}; +use rustc_literal_escaper::{EscapeError, Mode}; use rustc_span::{BytePos, Span}; use tracing::debug; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 841d967d934be..9c457f150a327 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -21,7 +21,7 @@ use rustc_ast::{ }; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; -use rustc_lexer::unescape::unescape_char; +use rustc_literal_escaper::unescape_char; use rustc_macros::Subdiagnostic; use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::lint::BuiltinLintDiag; diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index 289e062fb5e2b..b0b70cefb596c 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start rustc_lexer = { path = "../rustc_lexer" } +rustc-literal-escaper = "0.0.1" # tidy-alphabetical-end [target.'cfg(target_pointer_width = "64")'.dev-dependencies] diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 97931742985ef..c59e6cb5c33f7 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -18,7 +18,7 @@ pub use Alignment::*; pub use Count::*; pub use Position::*; -use rustc_lexer::unescape; +use rustc_literal_escaper::{Mode, unescape_unicode}; // Note: copied from rustc_span /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. @@ -1094,11 +1094,9 @@ fn find_width_map_from_snippet( fn unescape_string(string: &str) -> Option { let mut buf = String::new(); let mut ok = true; - unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| { - match unescaped_char { - Ok(c) => buf.push(c), - Err(_) => ok = false, - } + unescape_unicode(string, Mode::Str, &mut |_, unescaped_char| match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => ok = false, }); ok.then_some(buf) From bba0d57bb2773c0298e1b37456754610f069dd4a Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 4 Apr 2025 14:59:14 +0200 Subject: [PATCH 04/15] Remove usage of `rustc_lexer::unescape` in rust-analyzer --- src/tools/rust-analyzer/Cargo.lock | 8 ++++++++ src/tools/rust-analyzer/crates/parser/Cargo.toml | 1 + src/tools/rust-analyzer/crates/parser/src/lexed_str.rs | 10 +++++----- src/tools/rust-analyzer/crates/syntax/Cargo.toml | 1 + .../rust-analyzer/crates/syntax/src/ast/token_ext.rs | 2 +- src/tools/rust-analyzer/crates/syntax/src/lib.rs | 9 +-------- .../rust-analyzer/crates/syntax/src/validation.rs | 8 ++++---- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/tools/rust-analyzer/Cargo.lock b/src/tools/rust-analyzer/Cargo.lock index 1e1d68f778247..2aa776b508782 100644 --- a/src/tools/rust-analyzer/Cargo.lock +++ b/src/tools/rust-analyzer/Cargo.lock @@ -1270,6 +1270,7 @@ dependencies = [ "edition", "expect-test", "ra-ap-rustc_lexer", + "rustc-literal-escaper", "stdx", "tracing", ] @@ -1743,6 +1744,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +[[package]] +name = "rustc-literal-escaper" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdd0fcb1409d38cb2d940400497e2384a4a04b8685ee92a0a7a8986ccd72115" + [[package]] name = "rustc-stable-hash" version = "0.1.1" @@ -1982,6 +1989,7 @@ dependencies = [ "rayon", "rowan", "rustc-hash 2.0.0", + "rustc-literal-escaper", "rustc_apfloat", "smol_str", "stdx", diff --git a/src/tools/rust-analyzer/crates/parser/Cargo.toml b/src/tools/rust-analyzer/crates/parser/Cargo.toml index a36a39dbee6ce..292b7de255a86 100644 --- a/src/tools/rust-analyzer/crates/parser/Cargo.toml +++ b/src/tools/rust-analyzer/crates/parser/Cargo.toml @@ -14,6 +14,7 @@ rust-version.workspace = true [dependencies] drop_bomb = "0.1.5" ra-ap-rustc_lexer.workspace = true +rustc-literal-escaper = "0.0.1" tracing = { workspace = true, optional = true } edition.workspace = true diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index c97596d5097ec..b0bbc2fa5ff1f 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -10,7 +10,7 @@ use std::ops; -use rustc_lexer::unescape::{EscapeError, Mode}; +use rustc_literal_escaper::{EscapeError, Mode, unescape_byte, unescape_char, unescape_mixed, unescape_unicode}; use crate::{ Edition, @@ -282,7 +282,7 @@ impl<'a> Converter<'a> { let text = &self.res.text[self.offset + 1..][..len - 1]; let i = text.rfind('\'').unwrap(); let text = &text[..i]; - if let Err(e) = rustc_lexer::unescape::unescape_char(text) { + if let Err(e) = unescape_char(text) { err = error_to_diagnostic_message(e, Mode::Char); } } @@ -295,7 +295,7 @@ impl<'a> Converter<'a> { let text = &self.res.text[self.offset + 2..][..len - 2]; let i = text.rfind('\'').unwrap(); let text = &text[..i]; - if let Err(e) = rustc_lexer::unescape::unescape_byte(text) { + if let Err(e) = unescape_byte(text) { err = error_to_diagnostic_message(e, Mode::Byte); } } @@ -402,14 +402,14 @@ fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str { let mut error_message = ""; match mode { Mode::CStr => { - rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| { + unescape_mixed(text, mode, &mut |_, res| { if let Err(e) = res { error_message = error_to_diagnostic_message(e, mode); } }); } Mode::ByteStr | Mode::Str => { - rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| { + unescape_unicode(text, mode, &mut |_, res| { if let Err(e) = res { error_message = error_to_diagnostic_message(e, mode); } diff --git a/src/tools/rust-analyzer/crates/syntax/Cargo.toml b/src/tools/rust-analyzer/crates/syntax/Cargo.toml index 3fe6e01dc3c92..4fb628685a1b5 100644 --- a/src/tools/rust-analyzer/crates/syntax/Cargo.toml +++ b/src/tools/rust-analyzer/crates/syntax/Cargo.toml @@ -17,6 +17,7 @@ either.workspace = true itertools.workspace = true rowan = "=0.15.15" rustc-hash.workspace = true +rustc-literal-escaper = "0.0.1" indexmap.workspace = true smol_str.workspace = true triomphe.workspace = true diff --git a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs index df851ab5b2525..08bffb9e3aad3 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs @@ -2,7 +2,7 @@ use std::{borrow::Cow, num::ParseIntError}; -use rustc_lexer::unescape::{ +use rustc_literal_escaper::{ unescape_byte, unescape_char, unescape_mixed, unescape_unicode, EscapeError, MixedUnit, Mode, }; use stdx::always; diff --git a/src/tools/rust-analyzer/crates/syntax/src/lib.rs b/src/tools/rust-analyzer/crates/syntax/src/lib.rs index c9e9f468dca74..21f1ea5f913a7 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/lib.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/lib.rs @@ -19,13 +19,6 @@ //! [RFC]: //! [Swift]: -#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))] - -#[cfg(not(feature = "in-rust-tree"))] -extern crate ra_ap_rustc_lexer as rustc_lexer; -#[cfg(feature = "in-rust-tree")] -extern crate rustc_lexer; - mod parsing; mod ptr; mod syntax_error; @@ -64,7 +57,7 @@ pub use rowan::{ api::Preorder, Direction, GreenNode, NodeOrToken, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent, }; -pub use rustc_lexer::unescape; +pub use rustc_literal_escaper as unescape; pub use smol_str::{format_smolstr, SmolStr, SmolStrBuilder, ToSmolStr}; /// `Parse` is the result of the parsing: a syntax tree and a collection of diff --git a/src/tools/rust-analyzer/crates/syntax/src/validation.rs b/src/tools/rust-analyzer/crates/syntax/src/validation.rs index 85eefac734b20..71c5f9a946db0 100644 --- a/src/tools/rust-analyzer/crates/syntax/src/validation.rs +++ b/src/tools/rust-analyzer/crates/syntax/src/validation.rs @@ -5,7 +5,7 @@ mod block; use rowan::Direction; -use rustc_lexer::unescape::{self, unescape_mixed, unescape_unicode, Mode}; +use rustc_literal_escaper::{unescape_mixed, unescape_unicode, EscapeError, Mode}; use crate::{ algo, @@ -44,8 +44,8 @@ pub(crate) fn validate(root: &SyntaxNode, errors: &mut Vec) { } } -fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str, bool) { - use unescape::EscapeError as EE; +fn rustc_unescape_error_to_string(err: EscapeError) -> (&'static str, bool) { + use rustc_literal_escaper::EscapeError as EE; #[rustfmt::skip] let err_message = match err { @@ -127,7 +127,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec) { let text = token.text(); // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-lang/rust-analyzer/pull/2834#discussion_r366199205) - let mut push_err = |prefix_len, off, err: unescape::EscapeError| { + let mut push_err = |prefix_len, off, err: EscapeError| { let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap(); let (message, is_err) = rustc_unescape_error_to_string(err); // FIXME: Emit lexer warnings From 13400c2c10f04fe49f1765ceb2de837c86b6f915 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 4 Apr 2025 15:04:17 +0200 Subject: [PATCH 05/15] Add `_value` methods to proc_macro lib --- library/proc_macro/Cargo.toml | 1 + library/proc_macro/src/lib.rs | 114 ++++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/library/proc_macro/Cargo.toml b/library/proc_macro/Cargo.toml index 72cb4e4166f8e..f65c5e1a330ce 100644 --- a/library/proc_macro/Cargo.toml +++ b/library/proc_macro/Cargo.toml @@ -9,3 +9,4 @@ std = { path = "../std" } # `core` when resolving doc links. Without this line a different `core` will be # loaded from sysroot causing duplicate lang items and other similar errors. core = { path = "../core" } +rustc-literal-escaper = "0.0.1" diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index d9141eab5919f..35a8af03ba432 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -51,11 +51,24 @@ use std::{error, fmt}; #[unstable(feature = "proc_macro_diagnostic", issue = "54140")] pub use diagnostic::{Diagnostic, Level, MultiSpan}; +#[unstable(feature = "proc_macro_value", issue = "136652")] +pub use rustc_literal_escaper::EscapeError; +use rustc_literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode}; #[unstable(feature = "proc_macro_totokens", issue = "130977")] pub use to_tokens::ToTokens; use crate::escape::{EscapeOptions, escape_bytes}; +/// Errors returned when trying to retrieve a literal unescaped value. +#[unstable(feature = "proc_macro_value", issue = "136652")] +#[derive(Debug, PartialEq, Eq)] +pub enum ConversionErrorKind { + /// The literal failed to be escaped, take a look at [`EscapeError`] for more information. + FailedToUnescape(EscapeError), + /// Trying to convert a literal with the wrong type. + InvalidLiteralKind, +} + /// Determines whether proc_macro has been made accessible to the currently /// running program. /// @@ -1451,6 +1464,107 @@ impl Literal { } }) } + + /// Returns the unescaped string value if the current literal is a string or a string literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn str_value(&self) -> Result { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::Str => { + if symbol.contains('\\') { + let mut buf = String::with_capacity(symbol.len()); + let mut error = None; + // Force-inlining here is aggressive but the closure is + // called on every char in the string, so it can be hot in + // programs with many long strings containing escapes. + unescape_unicode( + symbol, + Mode::Str, + &mut #[inline(always)] + |_, c| match c { + Ok(c) => buf.push(c), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }, + ); + if let Some(error) = error { Err(error) } else { Ok(buf) } + } else { + Ok(symbol.to_string()) + } + } + bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()), + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } + + /// Returns the unescaped string value if the current literal is a c-string or a c-string + /// literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn cstr_value(&self) -> Result, ConversionErrorKind> { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::CStr => { + let mut error = None; + let mut buf = Vec::with_capacity(symbol.len()); + + unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c { + Ok(MixedUnit::Char(c)) => { + buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) + } + Ok(MixedUnit::HighByte(b)) => buf.push(b), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }); + if let Some(error) = error { + Err(error) + } else { + buf.push(0); + Ok(buf) + } + } + bridge::LitKind::CStrRaw(_) => { + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc` after appending the terminating NUL + // char. + let mut buf = symbol.to_owned().into_bytes(); + buf.push(0); + Ok(buf) + } + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } + + /// Returns the unescaped string value if the current literal is a byte string or a byte string + /// literal. + #[unstable(feature = "proc_macro_value", issue = "136652")] + pub fn byte_str_value(&self) -> Result, ConversionErrorKind> { + self.0.symbol.with(|symbol| match self.0.kind { + bridge::LitKind::ByteStr => { + let mut buf = Vec::with_capacity(symbol.len()); + let mut error = None; + + unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c { + Ok(c) => buf.push(byte_from_char(c)), + Err(err) => { + if err.is_fatal() { + error = Some(ConversionErrorKind::FailedToUnescape(err)); + } + } + }); + if let Some(error) = error { Err(error) } else { Ok(buf) } + } + bridge::LitKind::ByteStrRaw(_) => { + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc`. + Ok(symbol.to_owned().into_bytes()) + } + _ => Err(ConversionErrorKind::InvalidLiteralKind), + }) + } } /// Parse a single literal from its stringified representation. From e6498670868810b7d232223487e8f2019a6ebdbb Mon Sep 17 00:00:00 2001 From: Kornel Date: Sun, 16 Mar 2025 16:17:33 +0000 Subject: [PATCH 06/15] Optimize slice Iter::nth --- library/core/src/slice/iter.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index d48248749c265..b1a19e3427d64 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -1536,17 +1536,15 @@ impl<'a, T> Iterator for Chunks<'a, T> { #[inline] fn nth(&mut self, n: usize) -> Option { let (start, overflow) = n.overflowing_mul(self.chunk_size); - if start >= self.v.len() || overflow { - self.v = &[]; - None - } else { - let end = match start.checked_add(self.chunk_size) { - Some(sum) => cmp::min(self.v.len(), sum), - None => self.v.len(), - }; - let nth = &self.v[start..end]; - self.v = &self.v[end..]; + // min(len) makes a wrong start harmless, but enables optimizing this to brachless code + let chunk_start = &self.v[start.min(self.v.len())..]; + let (nth, remainder) = chunk_start.split_at(self.chunk_size.min(chunk_start.len())); + if !overflow && start < self.v.len() { + self.v = remainder; Some(nth) + } else { + self.v = &self.v[..0]; // cheaper than &[] + None } } From 89477aff04a4d8f837a50deb62416cf33ecf766c Mon Sep 17 00:00:00 2001 From: Kornel Date: Sun, 16 Mar 2025 16:48:41 +0000 Subject: [PATCH 07/15] Optimize slice Windows::nth Generates branchless code --- library/core/src/slice/iter.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index b1a19e3427d64..e9c27ddbbbaee 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -1380,14 +1380,16 @@ impl<'a, T> Iterator for Windows<'a, T> { #[inline] fn nth(&mut self, n: usize) -> Option { - let (end, overflow) = self.size.get().overflowing_add(n); - if end > self.v.len() || overflow { - self.v = &[]; - None - } else { - let nth = &self.v[n..end]; - self.v = &self.v[n + 1..]; + let size = self.size.get(); + if let Some(rest) = self.v.get(n..) + && let Some(nth) = rest.get(..size) + { + self.v = &rest[1..]; Some(nth) + } else { + // setting length to 0 is cheaper than overwriting the pointer when assigning &[] + self.v = &self.v[..0]; + None } } From 9f2f1aa08354019113f39672228fc62583e6a1ed Mon Sep 17 00:00:00 2001 From: Kornel Date: Sun, 16 Mar 2025 16:17:16 +0000 Subject: [PATCH 08/15] Optimize setting the slice to empty in slice Iter This avoids generating extra instructions that needlessly modify the slice's pointer --- library/core/src/slice/iter.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index e9c27ddbbbaee..f507ee563ac07 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -1388,7 +1388,7 @@ impl<'a, T> Iterator for Windows<'a, T> { Some(nth) } else { // setting length to 0 is cheaper than overwriting the pointer when assigning &[] - self.v = &self.v[..0]; + self.v = &self.v[..0]; // cheaper than &[] None } } @@ -1429,7 +1429,7 @@ impl<'a, T> DoubleEndedIterator for Windows<'a, T> { fn nth_back(&mut self, n: usize) -> Option { let (end, overflow) = self.v.len().overflowing_sub(n); if end < self.size.get() || overflow { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { let ret = &self.v[end - self.size.get()..end]; @@ -1609,7 +1609,7 @@ impl<'a, T> DoubleEndedIterator for Chunks<'a, T> { fn nth_back(&mut self, n: usize) -> Option { let len = self.len(); if n >= len { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { let start = (len - 1 - n) * self.chunk_size; @@ -1933,7 +1933,7 @@ impl<'a, T> Iterator for ChunksExact<'a, T> { fn nth(&mut self, n: usize) -> Option { let (start, overflow) = n.overflowing_mul(self.chunk_size); if start >= self.v.len() || overflow { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { let (_, snd) = self.v.split_at(start); @@ -1971,7 +1971,7 @@ impl<'a, T> DoubleEndedIterator for ChunksExact<'a, T> { fn nth_back(&mut self, n: usize) -> Option { let len = self.len(); if n >= len { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { let start = (len - 1 - n) * self.chunk_size; @@ -2638,7 +2638,7 @@ impl<'a, T> Iterator for RChunks<'a, T> { fn nth(&mut self, n: usize) -> Option { let (end, overflow) = n.overflowing_mul(self.chunk_size); if end >= self.v.len() || overflow { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { // Can't underflow because of the check above @@ -2695,7 +2695,7 @@ impl<'a, T> DoubleEndedIterator for RChunks<'a, T> { fn nth_back(&mut self, n: usize) -> Option { let len = self.len(); if n >= len { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { // can't underflow because `n < len` @@ -3023,7 +3023,7 @@ impl<'a, T> Iterator for RChunksExact<'a, T> { fn nth(&mut self, n: usize) -> Option { let (end, overflow) = n.overflowing_mul(self.chunk_size); if end >= self.v.len() || overflow { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { let (fst, _) = self.v.split_at(self.v.len() - end); @@ -3062,7 +3062,7 @@ impl<'a, T> DoubleEndedIterator for RChunksExact<'a, T> { fn nth_back(&mut self, n: usize) -> Option { let len = self.len(); if n >= len { - self.v = &[]; + self.v = &self.v[..0]; // cheaper than &[] None } else { // now that we know that `n` corresponds to a chunk, From 2b533e6b49e6c918389f7a38e6b6e7e030f30fed Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 4 Apr 2025 15:05:44 +0200 Subject: [PATCH 09/15] bootstrap: Only add `rustc_randomized_layouts` if the crate has it --- library/Cargo.lock | 5 +++++ library/proc_macro/Cargo.toml | 2 +- library/proc_macro/src/lib.rs | 1 + src/bootstrap/src/lib.rs | 2 +- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/library/Cargo.lock b/library/Cargo.lock index 23d9d926eba3b..c45a84b2e340c 100644 --- a/library/Cargo.lock +++ b/library/Cargo.lock @@ -235,6 +235,7 @@ name = "proc_macro" version = "0.0.0" dependencies = [ "core", + "rustc-literal-escaper", "std", ] @@ -310,6 +311,10 @@ dependencies = [ "rustc-std-workspace-core", ] +[[package]] +name = "rustc-literal-escaper" +version = "0.0.1" + [[package]] name = "rustc-std-workspace-alloc" version = "1.99.0" diff --git a/library/proc_macro/Cargo.toml b/library/proc_macro/Cargo.toml index f65c5e1a330ce..b8bc2a3af4cd4 100644 --- a/library/proc_macro/Cargo.toml +++ b/library/proc_macro/Cargo.toml @@ -9,4 +9,4 @@ std = { path = "../std" } # `core` when resolving doc links. Without this line a different `core` will be # loaded from sysroot causing duplicate lang items and other similar errors. core = { path = "../core" } -rustc-literal-escaper = "0.0.1" +rustc-literal-escaper = { version = "0.0.2", features = ["rustc-dep-of-std"] } diff --git a/library/proc_macro/src/lib.rs b/library/proc_macro/src/lib.rs index 35a8af03ba432..f1cf0c5a2db7c 100644 --- a/library/proc_macro/src/lib.rs +++ b/library/proc_macro/src/lib.rs @@ -27,6 +27,7 @@ #![feature(panic_can_unwind)] #![feature(restricted_std)] #![feature(rustc_attrs)] +#![feature(stmt_expr_attributes)] #![feature(extend_one)] #![recursion_limit = "256"] #![allow(internal_features)] diff --git a/src/bootstrap/src/lib.rs b/src/bootstrap/src/lib.rs index 843d474f92de8..7c60e0421421c 100644 --- a/src/bootstrap/src/lib.rs +++ b/src/bootstrap/src/lib.rs @@ -748,7 +748,7 @@ impl Build { features.push("llvm"); } // keep in sync with `bootstrap/compile.rs:rustc_cargo_env` - if self.config.rust_randomize_layout { + if self.config.rust_randomize_layout && check("rustc_randomized_layouts") { features.push("rustc_randomized_layouts"); } From 2e3a16187189ae8e7bdb2ac98a237e11540f993a Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 4 Apr 2025 22:06:10 +0200 Subject: [PATCH 10/15] Update `rustc-literal-escaper` version to `0.0.2` --- Cargo.lock | 4 ++-- compiler/rustc_ast/Cargo.toml | 2 +- compiler/rustc_parse/Cargo.toml | 2 +- compiler/rustc_parse_format/Cargo.toml | 2 +- library/Cargo.lock | 7 ++++++- src/tools/rust-analyzer/Cargo.lock | 4 ++-- src/tools/rust-analyzer/crates/parser/Cargo.toml | 2 +- src/tools/rust-analyzer/crates/syntax/Cargo.toml | 2 +- 8 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0bdddcc7a64de..699dd375dd25b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3152,9 +3152,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustc-literal-escaper" -version = "0.0.1" +version = "0.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdd0fcb1409d38cb2d940400497e2384a4a04b8685ee92a0a7a8986ccd72115" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" [[package]] name = "rustc-main" diff --git a/compiler/rustc_ast/Cargo.toml b/compiler/rustc_ast/Cargo.toml index 73b5732720480..b2d3b90fc4494 100644 --- a/compiler/rustc_ast/Cargo.toml +++ b/compiler/rustc_ast/Cargo.toml @@ -7,10 +7,10 @@ edition = "2024" # tidy-alphabetical-start bitflags = "2.4.1" memchr = "2.7.4" +rustc-literal-escaper = "0.0.2" rustc_ast_ir = { path = "../rustc_ast_ir" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_index = { path = "../rustc_index" } -rustc-literal-escaper = "0.0.1" rustc_macros = { path = "../rustc_macros" } rustc_serialize = { path = "../rustc_serialize" } rustc_span = { path = "../rustc_span" } diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index dec1e09d8dd83..6504081f0b9ce 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -6,6 +6,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" +rustc-literal-escaper = "0.0.2" rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_data_structures = { path = "../rustc_data_structures" } @@ -15,7 +16,6 @@ rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_index = { path = "../rustc_index" } rustc_lexer = { path = "../rustc_lexer" } rustc_macros = { path = "../rustc_macros" } -rustc-literal-escaper = "0.0.1" rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } thin-vec = "0.2.12" diff --git a/compiler/rustc_parse_format/Cargo.toml b/compiler/rustc_parse_format/Cargo.toml index b0b70cefb596c..52f23c00d4bc0 100644 --- a/compiler/rustc_parse_format/Cargo.toml +++ b/compiler/rustc_parse_format/Cargo.toml @@ -5,8 +5,8 @@ edition = "2024" [dependencies] # tidy-alphabetical-start +rustc-literal-escaper = "0.0.2" rustc_lexer = { path = "../rustc_lexer" } -rustc-literal-escaper = "0.0.1" # tidy-alphabetical-end [target.'cfg(target_pointer_width = "64")'.dev-dependencies] diff --git a/library/Cargo.lock b/library/Cargo.lock index c45a84b2e340c..589d8e5fd55f7 100644 --- a/library/Cargo.lock +++ b/library/Cargo.lock @@ -313,7 +313,12 @@ dependencies = [ [[package]] name = "rustc-literal-escaper" -version = "0.0.1" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" +dependencies = [ + "rustc-std-workspace-std", +] [[package]] name = "rustc-std-workspace-alloc" diff --git a/src/tools/rust-analyzer/Cargo.lock b/src/tools/rust-analyzer/Cargo.lock index 2aa776b508782..be63911526654 100644 --- a/src/tools/rust-analyzer/Cargo.lock +++ b/src/tools/rust-analyzer/Cargo.lock @@ -1746,9 +1746,9 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustc-literal-escaper" -version = "0.0.1" +version = "0.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdd0fcb1409d38cb2d940400497e2384a4a04b8685ee92a0a7a8986ccd72115" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" [[package]] name = "rustc-stable-hash" diff --git a/src/tools/rust-analyzer/crates/parser/Cargo.toml b/src/tools/rust-analyzer/crates/parser/Cargo.toml index 292b7de255a86..40c1e9fa7bdda 100644 --- a/src/tools/rust-analyzer/crates/parser/Cargo.toml +++ b/src/tools/rust-analyzer/crates/parser/Cargo.toml @@ -14,7 +14,7 @@ rust-version.workspace = true [dependencies] drop_bomb = "0.1.5" ra-ap-rustc_lexer.workspace = true -rustc-literal-escaper = "0.0.1" +rustc-literal-escaper = "0.0.2" tracing = { workspace = true, optional = true } edition.workspace = true diff --git a/src/tools/rust-analyzer/crates/syntax/Cargo.toml b/src/tools/rust-analyzer/crates/syntax/Cargo.toml index 4fb628685a1b5..23e1917922b7a 100644 --- a/src/tools/rust-analyzer/crates/syntax/Cargo.toml +++ b/src/tools/rust-analyzer/crates/syntax/Cargo.toml @@ -17,7 +17,7 @@ either.workspace = true itertools.workspace = true rowan = "=0.15.15" rustc-hash.workspace = true -rustc-literal-escaper = "0.0.1" +rustc-literal-escaper = "0.0.2" indexmap.workspace = true smol_str.workspace = true triomphe.workspace = true From 017e99b53b528609b68a45e20ac6f6fb4b61cded Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Fri, 4 Apr 2025 22:25:10 +0200 Subject: [PATCH 11/15] Add `rustc-literal-escaper` to allowed crates lists --- src/tools/tidy/src/deps.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 8f761d349cca5..682ab4875a126 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -361,6 +361,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "regex-syntax", "rustc-demangle", "rustc-hash", + "rustc-literal-escaper", "rustc-rayon", "rustc-rayon-core", "rustc-stable-hash", @@ -486,6 +487,7 @@ const PERMITTED_STDLIB_DEPENDENCIES: &[&str] = &[ "rand_core", "rand_xorshift", "rustc-demangle", + "rustc-literal-escaper", "shlex", "syn", "unicode-ident", From 6b5ccfc87f59ab017032e430d4d358b4989735da Mon Sep 17 00:00:00 2001 From: Takayuki Maeda Date: Sat, 5 Apr 2025 06:09:14 +0900 Subject: [PATCH 12/15] check if merged attributes list is empty --- compiler/rustc_ast_lowering/src/expr.rs | 16 +++++++----- tests/ui/attributes/invalid-reprs.rs | 6 +++++ tests/ui/attributes/invalid-reprs.stderr | 33 ++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 tests/ui/attributes/invalid-reprs.rs create mode 100644 tests/ui/attributes/invalid-reprs.stderr diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 52291fdfb3029..fc2aae5b27f79 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -74,14 +74,16 @@ impl<'hir> LoweringContext<'_, 'hir> { // Merge attributes into the inner expression. if !e.attrs.is_empty() { let old_attrs = self.attrs.get(&ex.hir_id.local_id).copied().unwrap_or(&[]); - self.attrs.insert( - ex.hir_id.local_id, - &*self.arena.alloc_from_iter( - self.lower_attrs_vec(&e.attrs, e.span) - .into_iter() - .chain(old_attrs.iter().cloned()), - ), + let attrs = &*self.arena.alloc_from_iter( + self.lower_attrs_vec(&e.attrs, e.span) + .into_iter() + .chain(old_attrs.iter().cloned()), ); + if attrs.is_empty() { + return ex; + } + + self.attrs.insert(ex.hir_id.local_id, attrs); } return ex; } diff --git a/tests/ui/attributes/invalid-reprs.rs b/tests/ui/attributes/invalid-reprs.rs new file mode 100644 index 0000000000000..95ed14b549163 --- /dev/null +++ b/tests/ui/attributes/invalid-reprs.rs @@ -0,0 +1,6 @@ +fn main() { + let y = #[repr(uwu(4))] + //~^ ERROR attributes on expressions are experimental + //~| ERROR unrecognized representation hint + (&id(5)); //~ ERROR: cannot find function `id` in this scope +} diff --git a/tests/ui/attributes/invalid-reprs.stderr b/tests/ui/attributes/invalid-reprs.stderr new file mode 100644 index 0000000000000..415b969b2440a --- /dev/null +++ b/tests/ui/attributes/invalid-reprs.stderr @@ -0,0 +1,33 @@ +error[E0658]: attributes on expressions are experimental + --> $DIR/invalid-reprs.rs:2:13 + | +LL | let y = #[repr(uwu(4))] + | ^^^^^^^^^^^^^^^ + | + = note: see issue #15701 for more information + = help: add `#![feature(stmt_expr_attributes)]` to the crate attributes to enable + = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date + +error[E0425]: cannot find function `id` in this scope + --> $DIR/invalid-reprs.rs:5:7 + | +LL | (&id(5)); + | ^^ not found in this scope + | +help: consider importing this function + | +LL + use std::process::id; + | + +error[E0552]: unrecognized representation hint + --> $DIR/invalid-reprs.rs:2:20 + | +LL | let y = #[repr(uwu(4))] + | ^^^^^^ + | + = help: valid reprs are `Rust` (default), `C`, `align`, `packed`, `transparent`, `simd`, `i8`, `u8`, `i16`, `u16`, `i32`, `u32`, `i64`, `u64`, `i128`, `u128`, `isize`, `usize` + +error: aborting due to 3 previous errors + +Some errors have detailed explanations: E0425, E0552, E0658. +For more information about an error, try `rustc --explain E0425`. From 66a273083c5cb833ad9341cd9f0690caf89e5bb1 Mon Sep 17 00:00:00 2001 From: Adam Kobzan Date: Sat, 5 Apr 2025 08:40:21 -0700 Subject: [PATCH 13/15] Fix Typo --- compiler/rustc_middle/src/ty/generic_args.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_middle/src/ty/generic_args.rs b/compiler/rustc_middle/src/ty/generic_args.rs index e87859a55eddd..439a2f3e16169 100644 --- a/compiler/rustc_middle/src/ty/generic_args.rs +++ b/compiler/rustc_middle/src/ty/generic_args.rs @@ -396,12 +396,12 @@ impl<'tcx> GenericArgs<'tcx> { InlineConstArgs { args: self } } - /// Creates an `GenericArgs` that maps each generic parameter to itself. + /// Creates a `GenericArgs` that maps each generic parameter to itself. pub fn identity_for_item(tcx: TyCtxt<'tcx>, def_id: impl Into) -> GenericArgsRef<'tcx> { Self::for_item(tcx, def_id.into(), |param, _| tcx.mk_param_from_def(param)) } - /// Creates an `GenericArgs` for generic parameter definitions, + /// Creates a `GenericArgs` for generic parameter definitions, /// by calling closures to obtain each kind. /// The closures get to observe the `GenericArgs` as they're /// being built, which can be used to correctly From e31d1d51e4fca3642bd2fcd474ccffcc6b36bb00 Mon Sep 17 00:00:00 2001 From: Adam Kobzan Date: Sat, 5 Apr 2025 09:09:28 -0700 Subject: [PATCH 14/15] format --- compiler/rustc_middle/src/ty/generic_args.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_middle/src/ty/generic_args.rs b/compiler/rustc_middle/src/ty/generic_args.rs index 439a2f3e16169..9c1ff134f0fd4 100644 --- a/compiler/rustc_middle/src/ty/generic_args.rs +++ b/compiler/rustc_middle/src/ty/generic_args.rs @@ -396,14 +396,14 @@ impl<'tcx> GenericArgs<'tcx> { InlineConstArgs { args: self } } - /// Creates a `GenericArgs` that maps each generic parameter to itself. + /// Creates a [`GenericArgs`] that maps each generic parameter to itself. pub fn identity_for_item(tcx: TyCtxt<'tcx>, def_id: impl Into) -> GenericArgsRef<'tcx> { Self::for_item(tcx, def_id.into(), |param, _| tcx.mk_param_from_def(param)) } - /// Creates a `GenericArgs` for generic parameter definitions, + /// Creates a [`GenericArgs`] for generic parameter definitions, /// by calling closures to obtain each kind. - /// The closures get to observe the `GenericArgs` as they're + /// The closures get to observe the [`GenericArgs`] as they're /// being built, which can be used to correctly /// replace defaults of generic parameters. pub fn for_item(tcx: TyCtxt<'tcx>, def_id: DefId, mut mk_kind: F) -> GenericArgsRef<'tcx> From 579753294041fe1a6085acb2a1b924d73c0c8ec8 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Sat, 5 Apr 2025 20:06:52 +0200 Subject: [PATCH 15/15] Clean up `rustc-literal-escaper` usage in rust-analyzer --- src/tools/rust-analyzer/Cargo.lock | 1 - src/tools/rust-analyzer/Cargo.toml | 1 + src/tools/rust-analyzer/crates/parser/Cargo.toml | 2 +- src/tools/rust-analyzer/crates/syntax/Cargo.toml | 4 +--- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/tools/rust-analyzer/Cargo.lock b/src/tools/rust-analyzer/Cargo.lock index be63911526654..745a8097c8a50 100644 --- a/src/tools/rust-analyzer/Cargo.lock +++ b/src/tools/rust-analyzer/Cargo.lock @@ -1985,7 +1985,6 @@ dependencies = [ "indexmap", "itertools", "parser", - "ra-ap-rustc_lexer", "rayon", "rowan", "rustc-hash 2.0.0", diff --git a/src/tools/rust-analyzer/Cargo.toml b/src/tools/rust-analyzer/Cargo.toml index ce2d66000e396..e22191397655d 100644 --- a/src/tools/rust-analyzer/Cargo.toml +++ b/src/tools/rust-analyzer/Cargo.toml @@ -136,6 +136,7 @@ pulldown-cmark-to-cmark = "10.0.4" pulldown-cmark = { version = "0.9.0", default-features = false } rayon = "1.8.0" rustc-hash = "2.0.0" +rustc-literal-escaper = "0.0.2" semver = "1.0.14" serde = { version = "1.0.192" } serde_derive = { version = "1.0.192" } diff --git a/src/tools/rust-analyzer/crates/parser/Cargo.toml b/src/tools/rust-analyzer/crates/parser/Cargo.toml index 40c1e9fa7bdda..114a66add63bd 100644 --- a/src/tools/rust-analyzer/crates/parser/Cargo.toml +++ b/src/tools/rust-analyzer/crates/parser/Cargo.toml @@ -14,7 +14,7 @@ rust-version.workspace = true [dependencies] drop_bomb = "0.1.5" ra-ap-rustc_lexer.workspace = true -rustc-literal-escaper = "0.0.2" +rustc-literal-escaper.workspace = true tracing = { workspace = true, optional = true } edition.workspace = true diff --git a/src/tools/rust-analyzer/crates/syntax/Cargo.toml b/src/tools/rust-analyzer/crates/syntax/Cargo.toml index 23e1917922b7a..6b356398204e1 100644 --- a/src/tools/rust-analyzer/crates/syntax/Cargo.toml +++ b/src/tools/rust-analyzer/crates/syntax/Cargo.toml @@ -17,14 +17,12 @@ either.workspace = true itertools.workspace = true rowan = "=0.15.15" rustc-hash.workspace = true -rustc-literal-escaper = "0.0.2" +rustc-literal-escaper.workspace = true indexmap.workspace = true smol_str.workspace = true triomphe.workspace = true tracing.workspace = true -ra-ap-rustc_lexer.workspace = true - parser.workspace = true stdx.workspace = true