1
1
use rustc_ast:: ast:: AttrStyle ;
2
2
use rustc_ast:: token:: { self , CommentKind , Token , TokenKind } ;
3
+ use rustc_ast:: tokenstream:: IsJoint ;
3
4
use rustc_data_structures:: sync:: Lrc ;
4
5
use rustc_errors:: { error_code, Applicability , DiagnosticBuilder , FatalError } ;
5
6
use rustc_lexer:: Base ;
@@ -65,42 +66,46 @@ impl<'a> StringReader<'a> {
65
66
self . override_span . unwrap_or_else ( || Span :: with_root_ctxt ( lo, hi) )
66
67
}
67
68
68
- /// Returns the next token, including trivia like whitespace or comments.
69
- fn next_token ( & mut self ) -> Token {
69
+ /// Returns the next token, and info about preceding whitespace, if any.
70
+ fn next_token ( & mut self ) -> ( IsJoint , Token ) {
71
+ let mut is_joint = IsJoint :: Joint ;
72
+
73
+ // Skip `#!` at the start of the file
70
74
let start_src_index = self . src_index ( self . pos ) ;
71
75
let text: & str = & self . src [ start_src_index..self . end_src_index ] ;
72
-
73
- if text. is_empty ( ) {
74
- let span = self . mk_sp ( self . pos , self . pos ) ;
75
- return Token :: new ( token:: Eof , span) ;
76
+ let is_beginning_of_file = self . pos == self . start_pos ;
77
+ if is_beginning_of_file {
78
+ if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
79
+ self . pos = self . pos + BytePos :: from_usize ( shebang_len) ;
80
+ is_joint = IsJoint :: NonJoint ;
81
+ }
76
82
}
77
83
78
- {
79
- let is_beginning_of_file = self . pos == self . start_pos ;
80
- if is_beginning_of_file {
81
- if let Some ( shebang_len) = rustc_lexer:: strip_shebang ( text) {
82
- let start = self . pos ;
83
- self . pos = self . pos + BytePos :: from_usize ( shebang_len) ;
84
+ // Skip trivial (whitespace & comments) tokens
85
+ loop {
86
+ let start_src_index = self . src_index ( self . pos ) ;
87
+ let text: & str = & self . src [ start_src_index..self . end_src_index ] ;
84
88
85
- let sym = self . symbol_from ( start + BytePos :: from_usize ( "#!" . len ( ) ) ) ;
86
- let kind = token:: Shebang ( sym) ;
87
-
88
- let span = self . mk_sp ( start, self . pos ) ;
89
- return Token :: new ( kind, span) ;
90
- }
89
+ if text. is_empty ( ) {
90
+ let span = self . mk_sp ( self . pos , self . pos ) ;
91
+ return ( is_joint, Token :: new ( token:: Eof , span) ) ;
91
92
}
92
- }
93
93
94
- let token = rustc_lexer:: first_token ( text) ;
94
+ let token = rustc_lexer:: first_token ( text) ;
95
95
96
- let start = self . pos ;
97
- self . pos = self . pos + BytePos :: from_usize ( token. len ) ;
96
+ let start = self . pos ;
97
+ self . pos = self . pos + BytePos :: from_usize ( token. len ) ;
98
98
99
- debug ! ( "try_next_token : {:?}({:?})" , token. kind, self . str_from( start) ) ;
99
+ debug ! ( "next_token : {:?}({:?})" , token. kind, self . str_from( start) ) ;
100
100
101
- let kind = self . cook_lexer_token ( token. kind , start) ;
102
- let span = self . mk_sp ( start, self . pos ) ;
103
- Token :: new ( kind, span)
101
+ match self . cook_lexer_token ( token. kind , start) {
102
+ Some ( kind) => {
103
+ let span = self . mk_sp ( start, self . pos ) ;
104
+ return ( is_joint, Token :: new ( kind, span) ) ;
105
+ }
106
+ None => is_joint = IsJoint :: NonJoint ,
107
+ }
108
+ }
104
109
}
105
110
106
111
/// Report a fatal lexical error with a given span.
@@ -140,19 +145,16 @@ impl<'a> StringReader<'a> {
140
145
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
141
146
/// `librustc_ast::TokenKind`. This turns strings into interned
142
147
/// symbols and runs additional validation.
143
- fn cook_lexer_token ( & self , token : rustc_lexer:: TokenKind , start : BytePos ) -> TokenKind {
144
- match token {
148
+ fn cook_lexer_token ( & self , token : rustc_lexer:: TokenKind , start : BytePos ) -> Option < TokenKind > {
149
+ Some ( match token {
145
150
rustc_lexer:: TokenKind :: LineComment { doc_style } => {
146
- match doc_style {
147
- Some ( doc_style) => {
148
- // Opening delimiter of the length 3 is not included into the symbol.
149
- let content_start = start + BytePos ( 3 ) ;
150
- let content = self . str_from ( content_start) ;
151
+ // Skip non-doc comments
152
+ let doc_style = doc_style?;
151
153
152
- self . cook_doc_comment ( content_start , content , CommentKind :: Line , doc_style )
153
- }
154
- None => token :: Comment ,
155
- }
154
+ // Opening delimiter of the length 3 is not included into the symbol.
155
+ let content_start = start + BytePos ( 3 ) ;
156
+ let content = self . str_from ( content_start ) ;
157
+ self . cook_doc_comment ( content_start , content , CommentKind :: Line , doc_style )
156
158
}
157
159
rustc_lexer:: TokenKind :: BlockComment { doc_style, terminated } => {
158
160
if !terminated {
@@ -171,20 +173,18 @@ impl<'a> StringReader<'a> {
171
173
. emit ( ) ;
172
174
FatalError . raise ( ) ;
173
175
}
174
- match doc_style {
175
- Some ( doc_style) => {
176
- // Opening delimiter of the length 3 and closing delimiter of the length 2
177
- // are not included into the symbol.
178
- let content_start = start + BytePos ( 3 ) ;
179
- let content_end = self . pos - BytePos ( if terminated { 2 } else { 0 } ) ;
180
- let content = self . str_from_to ( content_start, content_end) ;
181
-
182
- self . cook_doc_comment ( content_start, content, CommentKind :: Block , doc_style)
183
- }
184
- None => token:: Comment ,
185
- }
176
+
177
+ // Skip non-doc comments
178
+ let doc_style = doc_style?;
179
+
180
+ // Opening delimiter of the length 3 and closing delimiter of the length 2
181
+ // are not included into the symbol.
182
+ let content_start = start + BytePos ( 3 ) ;
183
+ let content_end = self . pos - BytePos ( if terminated { 2 } else { 0 } ) ;
184
+ let content = self . str_from_to ( content_start, content_end) ;
185
+ self . cook_doc_comment ( content_start, content, CommentKind :: Block , doc_style)
186
186
}
187
- rustc_lexer:: TokenKind :: Whitespace => token :: Whitespace ,
187
+ rustc_lexer:: TokenKind :: Whitespace => return None ,
188
188
rustc_lexer:: TokenKind :: Ident | rustc_lexer:: TokenKind :: RawIdent => {
189
189
let is_raw_ident = token == rustc_lexer:: TokenKind :: RawIdent ;
190
190
let mut ident_start = start;
@@ -282,12 +282,11 @@ impl<'a> StringReader<'a> {
282
282
// this should be inside `rustc_lexer`. However, we should first remove compound
283
283
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
284
284
// as there will be less overall work to do this way.
285
- let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err)
286
- . unwrap_or_else ( || token:: Unknown ( self . symbol_from ( start) ) ) ;
285
+ let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err) ;
287
286
err. emit ( ) ;
288
- token
287
+ token?
289
288
}
290
- }
289
+ } )
291
290
}
292
291
293
292
fn cook_doc_comment (
@@ -450,12 +449,6 @@ impl<'a> StringReader<'a> {
450
449
self . str_from_to ( start, self . pos )
451
450
}
452
451
453
- /// Creates a Symbol from a given offset to the current offset.
454
- fn symbol_from ( & self , start : BytePos ) -> Symbol {
455
- debug ! ( "taking an ident from {:?} to {:?}" , start, self . pos) ;
456
- Symbol :: intern ( self . str_from ( start) )
457
- }
458
-
459
452
/// As symbol_from, with an explicit endpoint.
460
453
fn symbol_from_to ( & self , start : BytePos , end : BytePos ) -> Symbol {
461
454
debug ! ( "taking an ident from {:?} to {:?}" , start, end) ;
0 commit comments