Skip to content

Commit b5e35b1

Browse files
committed
remove special code path for unknown tokens
1 parent e1d7e4a commit b5e35b1

File tree

5 files changed

+37
-61
lines changed

5 files changed

+37
-61
lines changed

src/libsyntax/parse/lexer/mod.rs

+13-60
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind};
33
use crate::symbol::{sym, Symbol};
44
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};
55

6-
use errors::{FatalError, Diagnostic, DiagnosticBuilder};
6+
use errors::{FatalError, DiagnosticBuilder};
77
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
88
use rustc_lexer::Base;
99
use rustc_lexer::unescape;
@@ -39,7 +39,6 @@ pub struct StringReader<'a> {
3939
pos: BytePos,
4040
/// Stop reading src at this index.
4141
end_src_index: usize,
42-
fatal_errs: Vec<DiagnosticBuilder<'a>>,
4342
/// Source text to tokenize.
4443
src: Lrc<String>,
4544
override_span: Option<Span>,
@@ -62,7 +61,6 @@ impl<'a> StringReader<'a> {
6261
pos: source_file.start_pos,
6362
end_src_index: src.len(),
6463
src,
65-
fatal_errs: Vec::new(),
6664
override_span,
6765
}
6866
}
@@ -89,29 +87,17 @@ impl<'a> StringReader<'a> {
8987
self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
9088
}
9189

92-
fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
93-
match res {
94-
Ok(tok) => tok,
95-
Err(_) => {
96-
self.emit_fatal_errors();
97-
FatalError.raise();
98-
}
99-
}
100-
}
101-
10290
/// Returns the next token, including trivia like whitespace or comments.
10391
///
10492
/// `Err(())` means that some errors were encountered, which can be
10593
/// retrieved using `buffer_fatal_errors`.
106-
pub fn try_next_token(&mut self) -> Result<Token, ()> {
107-
assert!(self.fatal_errs.is_empty());
108-
94+
pub fn next_token(&mut self) -> Token {
10995
let start_src_index = self.src_index(self.pos);
11096
let text: &str = &self.src[start_src_index..self.end_src_index];
11197

11298
if text.is_empty() {
11399
let span = self.mk_sp(self.pos, self.pos);
114-
return Ok(Token::new(token::Eof, span));
100+
return Token::new(token::Eof, span);
115101
}
116102

117103
{
@@ -125,7 +111,7 @@ impl<'a> StringReader<'a> {
125111
let kind = token::Shebang(sym);
126112

127113
let span = self.mk_sp(start, self.pos);
128-
return Ok(Token::new(kind, span));
114+
return Token::new(kind, span);
129115
}
130116
}
131117
}
@@ -139,39 +125,10 @@ impl<'a> StringReader<'a> {
139125

140126
// This could use `?`, but that makes code significantly (10-20%) slower.
141127
// https://github.com/rust-lang/rust/issues/37939
142-
let kind = match self.cook_lexer_token(token.kind, start) {
143-
Ok(it) => it,
144-
Err(err) => return Err(self.fatal_errs.push(err)),
145-
};
128+
let kind = self.cook_lexer_token(token.kind, start);
146129

147130
let span = self.mk_sp(start, self.pos);
148-
Ok(Token::new(kind, span))
149-
}
150-
151-
/// Returns the next token, including trivia like whitespace or comments.
152-
///
153-
/// Aborts in case of an error.
154-
pub fn next_token(&mut self) -> Token {
155-
let res = self.try_next_token();
156-
self.unwrap_or_abort(res)
157-
}
158-
159-
fn emit_fatal_errors(&mut self) {
160-
for err in &mut self.fatal_errs {
161-
err.emit();
162-
}
163-
164-
self.fatal_errs.clear();
165-
}
166-
167-
pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
168-
let mut buffer = Vec::new();
169-
170-
for err in self.fatal_errs.drain(..) {
171-
err.buffer(&mut buffer);
172-
}
173-
174-
buffer
131+
Token::new(kind, span)
175132
}
176133

177134
/// Report a fatal lexical error with a given span.
@@ -218,8 +175,8 @@ impl<'a> StringReader<'a> {
218175
&self,
219176
token: rustc_lexer::TokenKind,
220177
start: BytePos,
221-
) -> Result<TokenKind, DiagnosticBuilder<'a>> {
222-
let kind = match token {
178+
) -> TokenKind {
179+
match token {
223180
rustc_lexer::TokenKind::LineComment => {
224181
let string = self.str_from(start);
225182
// comments with only more "/"s are not doc comments
@@ -396,16 +353,12 @@ impl<'a> StringReader<'a> {
396353
// this should be inside `rustc_lexer`. However, we should first remove compound
397354
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
398355
// as there will be less overall work to do this way.
399-
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
400-
Some(token) => {
401-
err.emit();
402-
Ok(token)
403-
}
404-
None => Err(err),
405-
}
356+
let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
357+
.unwrap_or(token::Whitespace);
358+
err.emit();
359+
token
406360
}
407-
};
408-
Ok(kind)
361+
}
409362
}
410363

411364
fn cook_lexer_literal(

src/test/ui/parser/lex-bad-token.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
//~ ERROR: unknown start of token
2+
3+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
\ //~ ERROR: unknown start of token: \
2+
3+
fn main() {}

src/test/ui/parser/unicode-quote-chars.rs

+3
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,7 @@ fn main() {
44
println!(“hello world”);
55
//~^ ERROR unknown start of token: \u{201c}
66
//~^^ HELP Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '"' (Quotation Mark), but are not
7+
//~^^^ ERROR unknown start of token: \u{201d}
8+
//~^^^^ HELP Unicode character '”' (Right Double Quotation Mark) looks like '"' (Quotation Mark), but it is not
9+
//~^^^^^ ERROR expected token: `,`
710
}

src/test/ui/parser/unicode-quote-chars.stderr

+17-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,21 @@ help: Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Dou
88
LL | println!("hello world");
99
| ^^^^^^^^^^^^^
1010

11-
error: aborting due to previous error
11+
error: unknown start of token: \u{201d}
12+
--> $DIR/unicode-quote-chars.rs:4:26
13+
|
14+
LL | println!(“hello world”);
15+
| ^
16+
help: Unicode character '”' (Right Double Quotation Mark) looks like '"' (Quotation Mark), but it is not
17+
|
18+
LL | println!(“hello world");
19+
| ^
20+
21+
error: expected token: `,`
22+
--> $DIR/unicode-quote-chars.rs:4:21
23+
|
24+
LL | println!(“hello world”);
25+
| ^^^^^ expected `,`
26+
27+
error: aborting due to 3 previous errors
1228

0 commit comments

Comments
 (0)