diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index fabd43a1618a4..f0a6a5e072586 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -41,7 +41,8 @@ use std::{fmt, iter}; /// Nothing special happens to misnamed or misplaced `SubstNt`s. #[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] pub enum TokenTree { - /// A single token. + /// A single token. Should never be `OpenDelim` or `CloseDelim`, because + /// delimiters are implicitly represented by `Delimited`. Token(Token, Spacing), /// A delimited sequence of token trees. Delimited(DelimSpan, Delimiter, TokenStream), @@ -388,12 +389,12 @@ impl TokenStream { self.0.len() } - pub fn trees(&self) -> CursorRef<'_> { - CursorRef::new(self) + pub fn trees(&self) -> RefTokenTreeCursor<'_> { + RefTokenTreeCursor::new(self) } - pub fn into_trees(self) -> Cursor { - Cursor::new(self) + pub fn into_trees(self) -> TokenTreeCursor { + TokenTreeCursor::new(self) } /// Compares two `TokenStream`s, checking equality without regarding span information. @@ -551,16 +552,17 @@ impl TokenStream { } } -/// By-reference iterator over a [`TokenStream`]. +/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree` +/// items. #[derive(Clone)] -pub struct CursorRef<'t> { +pub struct RefTokenTreeCursor<'t> { stream: &'t TokenStream, index: usize, } -impl<'t> CursorRef<'t> { +impl<'t> RefTokenTreeCursor<'t> { fn new(stream: &'t TokenStream) -> Self { - CursorRef { stream, index: 0 } + RefTokenTreeCursor { stream, index: 0 } } pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { @@ -568,7 +570,7 @@ impl<'t> CursorRef<'t> { } } -impl<'t> Iterator for CursorRef<'t> { +impl<'t> Iterator for RefTokenTreeCursor<'t> { type Item = &'t TokenTree; fn next(&mut self) -> Option<&'t TokenTree> { @@ -579,15 +581,16 @@ impl<'t> Iterator for CursorRef<'t> { } } -/// Owning by-value iterator over a [`TokenStream`]. +/// Owning by-value iterator over a [`TokenStream`], that produces `TokenTree` +/// items. // FIXME: Many uses of this can be replaced with by-reference iterator to avoid clones. #[derive(Clone)] -pub struct Cursor { +pub struct TokenTreeCursor { pub stream: TokenStream, index: usize, } -impl Iterator for Cursor { +impl Iterator for TokenTreeCursor { type Item = TokenTree; fn next(&mut self) -> Option { @@ -598,9 +601,9 @@ impl Iterator for Cursor { } } -impl Cursor { +impl TokenTreeCursor { fn new(stream: TokenStream) -> Self { - Cursor { stream, index: 0 } + TokenTreeCursor { stream, index: 0 } } #[inline] @@ -614,6 +617,15 @@ impl Cursor { pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> { self.stream.0.get(self.index + n) } + + // Replace the previously obtained token tree with `tts`, and rewind to + // just before them. + pub fn replace_prev_and_rewind(&mut self, tts: Vec) { + assert!(self.index > 0); + self.index -= 1; + let stream = Lrc::make_mut(&mut self.stream.0); + stream.splice(self.index..self.index + 1, tts); + } } #[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)] diff --git a/compiler/rustc_expand/src/mbe/metavar_expr.rs b/compiler/rustc_expand/src/mbe/metavar_expr.rs index 99fe474541e9d..de34df0114a74 100644 --- a/compiler/rustc_expand/src/mbe/metavar_expr.rs +++ b/compiler/rustc_expand/src/mbe/metavar_expr.rs @@ -1,5 +1,5 @@ use rustc_ast::token::{self, Delimiter}; -use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{RefTokenTreeCursor, TokenStream, TokenTree}; use rustc_ast::{LitIntType, LitKind}; use rustc_ast_pretty::pprust; use rustc_errors::{Applicability, PResult}; @@ -72,7 +72,7 @@ impl MetaVarExpr { // Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}` fn check_trailing_token<'sess>( - iter: &mut CursorRef<'_>, + iter: &mut RefTokenTreeCursor<'_>, sess: &'sess ParseSess, ) -> PResult<'sess, ()> { if let Some(tt) = iter.next() { @@ -88,7 +88,7 @@ fn check_trailing_token<'sess>( /// Parse a meta-variable `count` expression: `count(ident[, depth])` fn parse_count<'sess>( - iter: &mut CursorRef<'_>, + iter: &mut RefTokenTreeCursor<'_>, sess: &'sess ParseSess, span: Span, ) -> PResult<'sess, MetaVarExpr> { @@ -99,7 +99,7 @@ fn parse_count<'sess>( /// Parses the depth used by index(depth) and length(depth). fn parse_depth<'sess>( - iter: &mut CursorRef<'_>, + iter: &mut RefTokenTreeCursor<'_>, sess: &'sess ParseSess, span: Span, ) -> PResult<'sess, usize> { @@ -126,7 +126,7 @@ fn parse_depth<'sess>( /// Parses an generic ident fn parse_ident<'sess>( - iter: &mut CursorRef<'_>, + iter: &mut RefTokenTreeCursor<'_>, sess: &'sess ParseSess, span: Span, ) -> PResult<'sess, Ident> { @@ -152,7 +152,7 @@ fn parse_ident<'sess>( /// Tries to move the iterator forward returning `true` if there is a comma. If not, then the /// iterator is not modified and the result is `false`. -fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool { +fn try_eat_comma(iter: &mut RefTokenTreeCursor<'_>) -> bool { if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) { let _ = iter.next(); return true; diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index b97f22417cb7b..dbd3b76786f42 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -469,6 +469,6 @@ mod size_asserts { use rustc_data_structures::static_assert_size; // tidy-alphabetical-start static_assert_size!(AttrWrapper, 16); - static_assert_size!(LazyAttrTokenStreamImpl, 144); + static_assert_size!(LazyAttrTokenStreamImpl, 120); // tidy-alphabetical-end } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index b7a023868fced..68849f817aa5a 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> { } if self.token.kind == TokenKind::Semi - && matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _))) + && matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _))) && self.may_recover() { // It is likely that the closure body is a block but where the diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index ffb23b50a160d..2ea55f838a37e 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -19,9 +19,8 @@ pub use path::PathStyle; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind}; -use rustc_ast::tokenstream::AttributesData; -use rustc_ast::tokenstream::{self, DelimSpan, Spacing}; -use rustc_ast::tokenstream::{TokenStream, TokenTree}; +use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing}; +use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor}; use rustc_ast::util::case::Case; use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; @@ -168,7 +167,7 @@ pub struct Parser<'a> { // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // it doesn't unintentionally get bigger. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(Parser<'_>, 336); +rustc_data_structures::static_assert_size!(Parser<'_>, 312); /// Stores span information about a closure. #[derive(Clone)] @@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> { } } +/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that +/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) +/// use this type to emit them as a linear sequence. But a linear sequence is +/// what the parser expects, for the most part. #[derive(Clone)] struct TokenCursor { - // The current (innermost) frame. `frame` and `stack` could be combined, - // but it's faster to have them separately to access `frame` directly - // rather than via something like `stack.last().unwrap()` or - // `stack[stack.len() - 1]`. - frame: TokenCursorFrame, - // Additional frames that enclose `frame`. - stack: Vec, + // Cursor for the current (innermost) token stream. The delimiters for this + // token stream are found in `self.stack.last()`; when that is `None` then + // we are in the outermost token stream which never has delimiters. + tree_cursor: TokenTreeCursor, + + // Token streams surrounding the current one. The delimiters for stack[n]'s + // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters + // because it's the outermost token stream which never has delimiters. + stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>, + desugar_doc_comments: bool, + // Counts the number of calls to `{,inlined_}next`. num_next_calls: usize, + // During parsing, we may sometimes need to 'unglue' a // glued token into two component tokens // (e.g. '>>' into '>' and '>), so that the parser @@ -257,18 +265,6 @@ struct TokenCursor { break_last_token: bool, } -#[derive(Clone)] -struct TokenCursorFrame { - delim_sp: Option<(Delimiter, DelimSpan)>, - tree_cursor: tokenstream::Cursor, -} - -impl TokenCursorFrame { - fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self { - TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() } - } -} - impl TokenCursor { fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { self.inlined_next(desugar_doc_comments) @@ -281,38 +277,47 @@ impl TokenCursor { // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be // removed. - if let Some(tree) = self.frame.tree_cursor.next_ref() { + if let Some(tree) = self.tree_cursor.next_ref() { match tree { &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - return self.desugar(attr_style, data, span); + let desugared = self.desugar(attr_style, data, span); + self.tree_cursor.replace_prev_and_rewind(desugared); + // Continue to get the first token of the desugared doc comment. + } + _ => { + debug_assert!(!matches!( + token.kind, + token::OpenDelim(_) | token::CloseDelim(_) + )); + return (token.clone(), spacing); } - _ => return (token.clone(), spacing), }, &TokenTree::Delimited(sp, delim, ref tts) => { - // Set `open_delim` to true here because we deal with it immediately. - let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone()); - self.stack.push(mem::replace(&mut self.frame, frame)); + let trees = tts.clone().into_trees(); + self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp)); if delim != Delimiter::Invisible { return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); } // No open delimiter to return; continue on to the next iteration. } }; - } else if let Some(frame) = self.stack.pop() { - if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible { - self.frame = frame; + } else if let Some((tree_cursor, delim, span)) = self.stack.pop() { + // We have exhausted this token stream. Move back to its parent token stream. + self.tree_cursor = tree_cursor; + if delim != Delimiter::Invisible { return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); } - self.frame = frame; // No close delimiter to return; continue on to the next iteration. } else { + // We have exhausted the outermost token stream. return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } } } - fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { + // Desugar a doc comment into something like `#[doc = r"foo"]`. + fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec { // Searches for the occurrences of `"#*` and returns the minimum number of `#`s // required to wrap the text. E.g. // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) @@ -346,27 +351,15 @@ impl TokenCursor { .collect::(), ); - self.stack.push(mem::replace( - &mut self.frame, - TokenCursorFrame::new( - None, - if attr_style == AttrStyle::Inner { - [ - TokenTree::token_alone(token::Pound, span), - TokenTree::token_alone(token::Not, span), - body, - ] - .into_iter() - .collect::() - } else { - [TokenTree::token_alone(token::Pound, span), body] - .into_iter() - .collect::() - }, - ), - )); - - self.next(/* desugar_doc_comments */ false) + if attr_style == AttrStyle::Inner { + vec![ + TokenTree::token_alone(token::Pound, span), + TokenTree::token_alone(token::Not, span), + body, + ] + } else { + vec![TokenTree::token_alone(token::Pound, span), body] + } } } @@ -475,7 +468,7 @@ impl<'a> Parser<'a> { restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - frame: TokenCursorFrame::new(None, tokens), + tree_cursor: tokens.into_trees(), stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, @@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> { return looker(&self.token); } - let frame = &self.token_cursor.frame; - if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible { + let tree_cursor = &self.token_cursor.tree_cursor; + if let Some(&(_, delim, span)) = self.token_cursor.stack.last() + && delim != Delimiter::Invisible + { let all_normal = (0..dist).all(|i| { - let token = frame.tree_cursor.look_ahead(i); + let token = tree_cursor.look_ahead(i); !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _))) }); if all_normal { - return match frame.tree_cursor.look_ahead(dist - 1) { + return match tree_cursor.look_ahead(dist - 1) { Some(tree) => match tree { TokenTree::Token(token, _) => looker(token), TokenTree::Delimited(dspan, delim, _) => { @@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> { pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - // Grab the tokens from this frame. - let frame = &self.token_cursor.frame; - let stream = frame.tree_cursor.stream.clone(); - let (delim, span) = frame.delim_sp.unwrap(); + // Grab the tokens within the delimiters. + let tree_cursor = &self.token_cursor.tree_cursor; + let stream = tree_cursor.stream.clone(); + let (_, delim, span) = *self.token_cursor.stack.last().unwrap(); // Advance the token cursor through the entire delimited // sequence. After getting the `OpenDelim` we are *within* the diff --git a/src/tools/rustfmt/src/macros.rs b/src/tools/rustfmt/src/macros.rs index d58f7547fefb3..7978d8cba9541 100644 --- a/src/tools/rustfmt/src/macros.rs +++ b/src/tools/rustfmt/src/macros.rs @@ -13,7 +13,7 @@ use std::collections::HashMap; use std::panic::{catch_unwind, AssertUnwindSafe}; use rustc_ast::token::{BinOpToken, Delimiter, Token, TokenKind}; -use rustc_ast::tokenstream::{Cursor, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor}; use rustc_ast::{ast, ptr}; use rustc_ast_pretty::pprust; use rustc_span::{ @@ -736,7 +736,7 @@ impl MacroArgParser { self.buf.clear(); } - fn add_meta_variable(&mut self, iter: &mut Cursor) -> Option<()> { + fn add_meta_variable(&mut self, iter: &mut TokenTreeCursor) -> Option<()> { match iter.next() { Some(TokenTree::Token( Token { @@ -768,7 +768,7 @@ impl MacroArgParser { &mut self, inner: Vec, delim: Delimiter, - iter: &mut Cursor, + iter: &mut TokenTreeCursor, ) -> Option<()> { let mut buffer = String::new(); let mut first = true; @@ -1121,7 +1121,7 @@ pub(crate) fn macro_style(mac: &ast::MacCall, context: &RewriteContext<'_>) -> D // Currently we do not attempt to parse any further than that. #[derive(new)] struct MacroParser { - toks: Cursor, + toks: TokenTreeCursor, } impl MacroParser {