Skip to content

Commit 5f1f617

Browse files
authored
Rollup merge of #120329 - nnethercote:3349-precursors, r=fee1-dead
RFC 3349 precursors Some cleanups I found while working on RFC 3349 that are worth landing separately. r? `@fee1-dead`
2 parents b09f232 + 6be2e56 commit 5f1f617

File tree

11 files changed

+161
-201
lines changed

11 files changed

+161
-201
lines changed

compiler/rustc_ast/src/util/literal.rs

+27-69
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
44
use crate::token::{self, Token};
55
use rustc_lexer::unescape::{
6-
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
7-
Mode,
6+
byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode,
87
};
98
use rustc_span::symbol::{kw, sym, Symbol};
109
use rustc_span::Span;
@@ -48,6 +47,9 @@ impl LitKind {
4847
return Err(LitError::InvalidSuffix);
4948
}
5049

50+
// For byte/char/string literals, chars and escapes have already been
51+
// checked in the lexer (in `cook_lexer_literal`). So we can assume all
52+
// chars and escapes are valid here.
5153
Ok(match kind {
5254
token::Bool => {
5355
assert!(symbol.is_bool_lit());
@@ -56,12 +58,12 @@ impl LitKind {
5658
token::Byte => {
5759
return unescape_byte(symbol.as_str())
5860
.map(LitKind::Byte)
59-
.map_err(|_| LitError::LexerError);
61+
.map_err(|_| panic!("failed to unescape byte literal"));
6062
}
6163
token::Char => {
6264
return unescape_char(symbol.as_str())
6365
.map(LitKind::Char)
64-
.map_err(|_| LitError::LexerError);
66+
.map_err(|_| panic!("failed to unescape char literal"));
6567
}
6668

6769
// There are some valid suffixes for integer and float literals,
@@ -77,113 +79,69 @@ impl LitKind {
7779
let s = symbol.as_str();
7880
// Vanilla strings are so common we optimize for the common case where no chars
7981
// requiring special behaviour are present.
80-
let symbol = if s.contains(['\\', '\r']) {
82+
let symbol = if s.contains('\\') {
8183
let mut buf = String::with_capacity(s.len());
82-
let mut error = Ok(());
8384
// Force-inlining here is aggressive but the closure is
84-
// called on every char in the string, so it can be
85-
// hot in programs with many long strings.
86-
unescape_literal(
85+
// called on every char in the string, so it can be hot in
86+
// programs with many long strings containing escapes.
87+
unescape_unicode(
8788
s,
8889
Mode::Str,
8990
&mut #[inline(always)]
90-
|_, unescaped_char| match unescaped_char {
91+
|_, c| match c {
9192
Ok(c) => buf.push(c),
9293
Err(err) => {
93-
if err.is_fatal() {
94-
error = Err(LitError::LexerError);
95-
}
94+
assert!(!err.is_fatal(), "failed to unescape string literal")
9695
}
9796
},
9897
);
99-
error?;
10098
Symbol::intern(&buf)
10199
} else {
102100
symbol
103101
};
104102
LitKind::Str(symbol, ast::StrStyle::Cooked)
105103
}
106104
token::StrRaw(n) => {
107-
// Raw strings have no escapes, so we only need to check for invalid chars, and we
108-
// can reuse the symbol on success.
109-
let mut error = Ok(());
110-
unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| {
111-
match unescaped_char {
112-
Ok(_) => {}
113-
Err(err) => {
114-
if err.is_fatal() {
115-
error = Err(LitError::LexerError);
116-
}
117-
}
118-
}
119-
});
120-
error?;
105+
// Raw strings have no escapes so no work is needed here.
121106
LitKind::Str(symbol, ast::StrStyle::Raw(n))
122107
}
123108
token::ByteStr => {
124109
let s = symbol.as_str();
125110
let mut buf = Vec::with_capacity(s.len());
126-
let mut error = Ok(());
127-
unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
111+
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
128112
Ok(c) => buf.push(byte_from_char(c)),
129113
Err(err) => {
130-
if err.is_fatal() {
131-
error = Err(LitError::LexerError);
132-
}
114+
assert!(!err.is_fatal(), "failed to unescape string literal")
133115
}
134116
});
135-
error?;
136117
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
137118
}
138119
token::ByteStrRaw(n) => {
139-
// Raw strings have no escapes, so we only need to check for invalid chars, and we
140-
// can convert the symbol directly to a `Lrc<u8>` on success.
141-
let s = symbol.as_str();
142-
let mut error = Ok(());
143-
unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c {
144-
Ok(_) => {}
145-
Err(err) => {
146-
if err.is_fatal() {
147-
error = Err(LitError::LexerError);
148-
}
149-
}
150-
});
151-
LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n))
120+
// Raw strings have no escapes so we can convert the symbol
121+
// directly to a `Lrc<u8>`.
122+
let buf = symbol.as_str().to_owned().into_bytes();
123+
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
152124
}
153125
token::CStr => {
154126
let s = symbol.as_str();
155127
let mut buf = Vec::with_capacity(s.len());
156-
let mut error = Ok(());
157-
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
158-
Ok(CStrUnit::Byte(b)) => buf.push(b),
159-
Ok(CStrUnit::Char(c)) => {
128+
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
129+
Ok(MixedUnit::Char(c)) => {
160130
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
161131
}
132+
Ok(MixedUnit::HighByte(b)) => buf.push(b),
162133
Err(err) => {
163-
if err.is_fatal() {
164-
error = Err(LitError::LexerError);
165-
}
134+
assert!(!err.is_fatal(), "failed to unescape C string literal")
166135
}
167136
});
168-
error?;
169137
buf.push(0);
170138
LitKind::CStr(buf.into(), StrStyle::Cooked)
171139
}
172140
token::CStrRaw(n) => {
173-
// Raw strings have no escapes, so we only need to check for invalid chars, and we
174-
// can convert the symbol directly to a `Lrc<u8>` on success.
175-
let s = symbol.as_str();
176-
let mut error = Ok(());
177-
unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
178-
Ok(_) => {}
179-
Err(err) => {
180-
if err.is_fatal() {
181-
error = Err(LitError::LexerError);
182-
}
183-
}
184-
});
185-
error?;
186-
let mut buf = s.to_owned().into_bytes();
141+
// Raw strings have no escapes so we can convert the symbol
142+
// directly to a `Lrc<u8>` after appending the terminating NUL
143+
// char.
144+
let mut buf = symbol.as_str().to_owned().into_bytes();
187145
buf.push(0);
188146
LitKind::CStr(buf.into(), StrStyle::Raw(n))
189147
}

0 commit comments

Comments
 (0)