@@ -25,7 +25,7 @@ pub(crate) struct YamlLexer<'src> {
2525 /// Cache of tokens to be emitted to the parser
2626 tokens : LinkedList < LexToken > ,
2727
28- /// Cached of tokens that should only be after the current scope has been properly closed.
28+ /// Cache of tokens that should only be emitted after the current scope has been properly closed.
2929 cached_scope_closing_tokens : Option < LinkedList < LexToken > > ,
3030}
3131
@@ -53,30 +53,27 @@ impl<'src> YamlLexer<'src> {
5353 /// ```
5454 fn consume_tokens ( & mut self ) {
5555 let Some ( current) = self . current_byte ( ) else {
56- while let Some ( scope) = self . scopes . pop ( ) {
57- self . tokens . push_back ( LexToken :: pseudo (
58- scope. close_token_kind ( ) ,
59- self . current_coordinate ,
60- ) ) ;
61- }
56+ let mut tokens = self . close_all_scopes ( ) ;
57+ self . tokens . append ( & mut tokens) ;
6258 self . tokens
6359 . push_back ( LexToken :: pseudo ( EOF , self . current_coordinate ) ) ;
6460 return ;
6561 } ;
6662
6763 let start = self . text_position ( ) ;
6864
69- let mut tokens = match ( current, self . peek_byte ( ) ) {
70- ( c, _) if is_space ( c) => self . consume_whitespace_token ( ) . into ( ) ,
71- ( b'#' , _) => self . consume_comment ( ) . into ( ) ,
72- ( c, _) if is_break ( c) => self . evaluate_block_scope ( ) ,
73- ( current, peek) if maybe_at_mapping_start ( current, peek) => {
65+ let mut tokens = match current {
66+ c if is_break ( c) => self . evaluate_block_scope ( ) ,
67+ c if is_space ( c) => self . consume_whitespace_token ( ) . into ( ) ,
68+ b'#' => self . consume_comment ( ) . into ( ) ,
69+ b'.' if self . is_at_doc_end ( ) => self . consume_doc_end ( ) ,
70+ current if maybe_at_mapping_start ( current, self . peek_byte ( ) ) => {
7471 self . consume_potential_mapping_start ( current)
7572 }
7673 // ':', '?', '-' can be a valid plain token start
77- ( b'?' | b':' , _ ) => self . consume_mapping_key ( current) ,
78- ( b'-' , _ ) => self . consume_sequence_entry ( ) ,
79- ( b'|' | b'>' , _ ) => self . consume_block_scalar ( current) ,
74+ b'?' | b':' => self . consume_mapping_key ( current) ,
75+ b'-' => self . consume_sequence_entry ( ) ,
76+ b'|' | b'>' => self . consume_block_scalar ( current) ,
8077 _ => self . consume_unexpected_token ( ) . into ( ) ,
8178 } ;
8279 self . tokens . append ( & mut tokens) ;
@@ -216,25 +213,9 @@ impl<'src> YamlLexer<'src> {
216213 }
217214 }
218215
219- // The spec only allows trailing trivia followed a block header
220- let mut trivia = self . consume_trivia ( true ) ;
216+ let mut trivia = self . consume_trailing_trivia ( ) ;
221217 tokens. append ( & mut trivia) ;
222218
223- if self . current_byte ( ) . is_none_or ( is_break) {
224- return tokens;
225- }
226-
227- // Consume the rest of the invalid characters so that the block content can cleanly start
228- // at a newline.
229- let start = self . current_coordinate ;
230- while let Some ( c) = self . current_byte ( ) {
231- if is_break ( c) {
232- break ;
233- }
234- self . advance_char_unchecked ( ) ;
235- }
236-
237- tokens. push_back ( LexToken :: new ( ERROR_TOKEN , start, self . current_coordinate ) ) ;
238219 tokens
239220 }
240221
@@ -296,13 +277,16 @@ impl<'src> YamlLexer<'src> {
296277 debug_assert ! ( self . current_byte( ) . is_some_and( is_break) ) ;
297278 let start = self . current_coordinate ;
298279 let mut trivia = self . consume_trivia ( false ) ;
299- let mut scope_end_tokens = self . close_scope ( start) ;
280+ let mut scope_end_tokens = self . close_breached_scopes ( start) ;
300281 scope_end_tokens. append ( & mut trivia) ;
301282 scope_end_tokens
302283 }
303284
304285 /// Close all violated scopes, and emit closing tokens right after the last non trivia token
305- fn close_scope ( & mut self , scope_end_coordinate : TextCoordinate ) -> LinkedList < LexToken > {
286+ fn close_breached_scopes (
287+ & mut self ,
288+ scope_end_coordinate : TextCoordinate ,
289+ ) -> LinkedList < LexToken > {
306290 let mut scope_end_tokens = LinkedList :: new ( ) ;
307291 while let Some ( scope) = self . scopes . pop ( ) {
308292 if scope. contains (
@@ -321,6 +305,17 @@ impl<'src> YamlLexer<'src> {
321305 scope_end_tokens
322306 }
323307
308+ fn close_all_scopes ( & mut self ) -> LinkedList < LexToken > {
309+ let tokens = LinkedList :: new ( ) ;
310+ while let Some ( scope) = self . scopes . pop ( ) {
311+ self . tokens . push_back ( LexToken :: pseudo (
312+ scope. close_token_kind ( ) ,
313+ self . current_coordinate ,
314+ ) ) ;
315+ }
316+ tokens
317+ }
318+
324319 /// Consume a YAML flow value that can be used inside an implicit mapping key
325320 /// https://yaml.org/spec/1.2.2/#rule-ns-s-block-map-implicit-key
326321 fn consume_potential_mapping_key ( & mut self , current : u8 ) -> LinkedList < LexToken > {
@@ -533,6 +528,30 @@ impl<'src> YamlLexer<'src> {
533528 LexToken :: new ( SINGLE_QUOTED_LITERAL , start, token_end)
534529 }
535530
531+ fn is_at_doc_end ( & self ) -> bool {
532+ let is_dot = |c : u8 | c == b'.' ;
533+ // A DOC_END token can be evaluated as a plain token if it's not placed at the start of
534+ // line
535+ self . current_coordinate . column == 0
536+ && self . current_byte ( ) . is_some_and ( is_dot)
537+ && self . peek_byte ( ) . is_some_and ( is_dot)
538+ && self . byte_at ( 2 ) . is_some_and ( is_dot)
539+ }
540+
541+ fn consume_doc_end ( & mut self ) -> LinkedList < LexToken > {
542+ self . assert_byte ( b'.' ) ;
543+ debug_assert_eq ! ( self . byte_at( 1 ) , Some ( b'.' ) ) ;
544+ debug_assert_eq ! ( self . byte_at( 2 ) , Some ( b'.' ) ) ;
545+ let start = self . current_coordinate ;
546+ let mut tokens = self . close_all_scopes ( ) ;
547+ self . advance ( 3 ) ;
548+ tokens. push_back ( LexToken :: new ( DOC_END , start, self . current_coordinate ) ) ;
549+ let mut trivia = self . consume_trailing_trivia ( ) ;
550+ tokens. append ( & mut trivia) ;
551+
552+ tokens
553+ }
554+
536555 /// Bumps the current byte and creates a lexed token of the passed in kind.
537556 #[ inline]
538557 fn consume_byte_as_token ( & mut self , tok : YamlSyntaxKind ) -> LexToken {
@@ -548,7 +567,7 @@ impl<'src> YamlLexer<'src> {
548567 let start = self . current_coordinate ;
549568 let mut trivia = self . consume_trivia ( false ) ;
550569 if self . breach_parent_scope ( ) {
551- let mut scope_end_tokens = self . close_scope ( start) ;
570+ let mut scope_end_tokens = self . close_breached_scopes ( start) ;
552571 scope_end_tokens. append ( & mut trivia) ;
553572 self . cached_scope_closing_tokens = Some ( scope_end_tokens) ;
554573 None
@@ -590,7 +609,7 @@ impl<'src> YamlLexer<'src> {
590609 }
591610 }
592611 if self . breach_parent_scope ( ) {
593- let mut scope_end_tokens = self . close_scope ( start) ;
612+ let mut scope_end_tokens = self . close_breached_scopes ( start) ;
594613 scope_end_tokens. append ( & mut trivia) ;
595614 self . cached_scope_closing_tokens = Some ( scope_end_tokens) ;
596615 false
@@ -633,6 +652,29 @@ impl<'src> YamlLexer<'src> {
633652 LexToken :: new ( ERROR_TOKEN , start, self . current_coordinate )
634653 }
635654
655+ /// Some constructs, like block header or document end (`...`), don't allow any trailing tokens
656+ /// except for trivia.
657+ /// This function is responsible for consuming the trailing trivia and any unexpected tokens
658+ fn consume_trailing_trivia ( & mut self ) -> LinkedList < LexToken > {
659+ self . assert_current_char_boundary ( ) ;
660+
661+ let mut tokens = self . consume_trivia ( true ) ;
662+
663+ if self . current_byte ( ) . is_none_or ( is_break) {
664+ return tokens;
665+ }
666+
667+ let start = self . current_coordinate ;
668+ while let Some ( c) = self . current_byte ( ) {
669+ if is_break ( c) {
670+ break ;
671+ }
672+ self . advance_char_unchecked ( ) ;
673+ }
674+ tokens. push_back ( LexToken :: new ( ERROR_TOKEN , start, self . current_coordinate ) ) ;
675+ tokens
676+ }
677+
636678 fn consume_unexpected_character ( & mut self ) {
637679 self . assert_current_char_boundary ( ) ;
638680
0 commit comments