Skip to content

Improve TokenCursor. #107544

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 27 additions & 15 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ use std::{fmt, iter};
/// Nothing special happens to misnamed or misplaced `SubstNt`s.
#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
pub enum TokenTree {
/// A single token.
/// A single token. Should never be `OpenDelim` or `CloseDelim`, because
/// delimiters are implicitly represented by `Delimited`.
Token(Token, Spacing),
/// A delimited sequence of token trees.
Delimited(DelimSpan, Delimiter, TokenStream),
Expand Down Expand Up @@ -388,12 +389,12 @@ impl TokenStream {
self.0.len()
}

pub fn trees(&self) -> CursorRef<'_> {
CursorRef::new(self)
pub fn trees(&self) -> RefTokenTreeCursor<'_> {
RefTokenTreeCursor::new(self)
}

pub fn into_trees(self) -> Cursor {
Cursor::new(self)
pub fn into_trees(self) -> TokenTreeCursor {
TokenTreeCursor::new(self)
}

/// Compares two `TokenStream`s, checking equality without regarding span information.
Expand Down Expand Up @@ -551,24 +552,25 @@ impl TokenStream {
}
}

/// By-reference iterator over a [`TokenStream`].
/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
/// items.
#[derive(Clone)]
pub struct CursorRef<'t> {
pub struct RefTokenTreeCursor<'t> {
stream: &'t TokenStream,
index: usize,
}

impl<'t> CursorRef<'t> {
impl<'t> RefTokenTreeCursor<'t> {
fn new(stream: &'t TokenStream) -> Self {
CursorRef { stream, index: 0 }
RefTokenTreeCursor { stream, index: 0 }
}

pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n)
}
}

impl<'t> Iterator for CursorRef<'t> {
impl<'t> Iterator for RefTokenTreeCursor<'t> {
type Item = &'t TokenTree;

fn next(&mut self) -> Option<&'t TokenTree> {
Expand All @@ -579,15 +581,16 @@ impl<'t> Iterator for CursorRef<'t> {
}
}

/// Owning by-value iterator over a [`TokenStream`].
/// Owning by-value iterator over a [`TokenStream`], that produces `TokenTree`
/// items.
// FIXME: Many uses of this can be replaced with by-reference iterator to avoid clones.
#[derive(Clone)]
pub struct Cursor {
pub struct TokenTreeCursor {
pub stream: TokenStream,
index: usize,
}

impl Iterator for Cursor {
impl Iterator for TokenTreeCursor {
type Item = TokenTree;

fn next(&mut self) -> Option<TokenTree> {
Expand All @@ -598,9 +601,9 @@ impl Iterator for Cursor {
}
}

impl Cursor {
impl TokenTreeCursor {
fn new(stream: TokenStream) -> Self {
Cursor { stream, index: 0 }
TokenTreeCursor { stream, index: 0 }
}

#[inline]
Expand All @@ -614,6 +617,15 @@ impl Cursor {
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n)
}

// Replace the previously obtained token tree with `tts`, and rewind to
// just before them.
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
assert!(self.index > 0);
self.index -= 1;
let stream = Lrc::make_mut(&mut self.stream.0);
stream.splice(self.index..self.index + 1, tts);
}
}

#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
Expand Down
12 changes: 6 additions & 6 deletions compiler/rustc_expand/src/mbe/metavar_expr.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use rustc_ast::token::{self, Delimiter};
use rustc_ast::tokenstream::{CursorRef, TokenStream, TokenTree};
use rustc_ast::tokenstream::{RefTokenTreeCursor, TokenStream, TokenTree};
use rustc_ast::{LitIntType, LitKind};
use rustc_ast_pretty::pprust;
use rustc_errors::{Applicability, PResult};
Expand Down Expand Up @@ -72,7 +72,7 @@ impl MetaVarExpr {

// Checks if there are any remaining tokens. For example, `${ignore(ident ... a b c ...)}`
fn check_trailing_token<'sess>(
iter: &mut CursorRef<'_>,
iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess,
) -> PResult<'sess, ()> {
if let Some(tt) = iter.next() {
Expand All @@ -88,7 +88,7 @@ fn check_trailing_token<'sess>(

/// Parse a meta-variable `count` expression: `count(ident[, depth])`
fn parse_count<'sess>(
iter: &mut CursorRef<'_>,
iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess,
span: Span,
) -> PResult<'sess, MetaVarExpr> {
Expand All @@ -99,7 +99,7 @@ fn parse_count<'sess>(

/// Parses the depth used by index(depth) and length(depth).
fn parse_depth<'sess>(
iter: &mut CursorRef<'_>,
iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess,
span: Span,
) -> PResult<'sess, usize> {
Expand All @@ -126,7 +126,7 @@ fn parse_depth<'sess>(

/// Parses an generic ident
fn parse_ident<'sess>(
iter: &mut CursorRef<'_>,
iter: &mut RefTokenTreeCursor<'_>,
sess: &'sess ParseSess,
span: Span,
) -> PResult<'sess, Ident> {
Expand All @@ -152,7 +152,7 @@ fn parse_ident<'sess>(

/// Tries to move the iterator forward returning `true` if there is a comma. If not, then the
/// iterator is not modified and the result is `false`.
fn try_eat_comma(iter: &mut CursorRef<'_>) -> bool {
fn try_eat_comma(iter: &mut RefTokenTreeCursor<'_>) -> bool {
if let Some(TokenTree::Token(token::Token { kind: token::Comma, .. }, _)) = iter.look_ahead(0) {
let _ = iter.next();
return true;
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/parser/attr_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,6 @@ mod size_asserts {
use rustc_data_structures::static_assert_size;
// tidy-alphabetical-start
static_assert_size!(AttrWrapper, 16);
static_assert_size!(LazyAttrTokenStreamImpl, 144);
static_assert_size!(LazyAttrTokenStreamImpl, 120);
// tidy-alphabetical-end
}
2 changes: 1 addition & 1 deletion compiler/rustc_parse/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> {
}

if self.token.kind == TokenKind::Semi
&& matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _)))
&& matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _)))
&& self.may_recover()
{
// It is likely that the closure body is a block but where the
Expand Down
123 changes: 59 additions & 64 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ pub use path::PathStyle;

use rustc_ast::ptr::P;
use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::AttributesData;
use rustc_ast::tokenstream::{self, DelimSpan, Spacing};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing};
use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
use rustc_ast::util::case::Case;
use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID;
Expand Down Expand Up @@ -168,7 +167,7 @@ pub struct Parser<'a> {
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
// it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(Parser<'_>, 336);
rustc_data_structures::static_assert_size!(Parser<'_>, 312);

/// Stores span information about a closure.
#[derive(Clone)]
Expand Down Expand Up @@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> {
}
}

/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
/// use this type to emit them as a linear sequence. But a linear sequence is
/// what the parser expects, for the most part.
#[derive(Clone)]
struct TokenCursor {
// The current (innermost) frame. `frame` and `stack` could be combined,
// but it's faster to have them separately to access `frame` directly
// rather than via something like `stack.last().unwrap()` or
// `stack[stack.len() - 1]`.
frame: TokenCursorFrame,
// Additional frames that enclose `frame`.
stack: Vec<TokenCursorFrame>,
// Cursor for the current (innermost) token stream. The delimiters for this
// token stream are found in `self.stack.last()`; when that is `None` then
// we are in the outermost token stream which never has delimiters.
tree_cursor: TokenTreeCursor,

// Token streams surrounding the current one. The delimiters for stack[n]'s
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
// because it's the outermost token stream which never has delimiters.
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,

desugar_doc_comments: bool,

// Counts the number of calls to `{,inlined_}next`.
num_next_calls: usize,

// During parsing, we may sometimes need to 'unglue' a
// glued token into two component tokens
// (e.g. '>>' into '>' and '>), so that the parser
Expand All @@ -257,18 +265,6 @@ struct TokenCursor {
break_last_token: bool,
}

#[derive(Clone)]
struct TokenCursorFrame {
delim_sp: Option<(Delimiter, DelimSpan)>,
tree_cursor: tokenstream::Cursor,
}

impl TokenCursorFrame {
fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self {
TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() }
}
}

impl TokenCursor {
fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) {
self.inlined_next(desugar_doc_comments)
Expand All @@ -281,38 +277,47 @@ impl TokenCursor {
// FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
// need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
// removed.
if let Some(tree) = self.frame.tree_cursor.next_ref() {
if let Some(tree) = self.tree_cursor.next_ref() {
match tree {
&TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) {
(true, &Token { kind: token::DocComment(_, attr_style, data), span }) => {
return self.desugar(attr_style, data, span);
let desugared = self.desugar(attr_style, data, span);
self.tree_cursor.replace_prev_and_rewind(desugared);
// Continue to get the first token of the desugared doc comment.
}
_ => {
debug_assert!(!matches!(
token.kind,
token::OpenDelim(_) | token::CloseDelim(_)
));
return (token.clone(), spacing);
}
_ => return (token.clone(), spacing),
},
&TokenTree::Delimited(sp, delim, ref tts) => {
// Set `open_delim` to true here because we deal with it immediately.
let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone());
self.stack.push(mem::replace(&mut self.frame, frame));
let trees = tts.clone().into_trees();
self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp));
if delim != Delimiter::Invisible {
return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone);
}
// No open delimiter to return; continue on to the next iteration.
}
};
} else if let Some(frame) = self.stack.pop() {
if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible {
self.frame = frame;
} else if let Some((tree_cursor, delim, span)) = self.stack.pop() {
// We have exhausted this token stream. Move back to its parent token stream.
self.tree_cursor = tree_cursor;
if delim != Delimiter::Invisible {
return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone);
}
self.frame = frame;
// No close delimiter to return; continue on to the next iteration.
} else {
// We have exhausted the outermost token stream.
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
}
}
}

fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) {
// Desugar a doc comment into something like `#[doc = r"foo"]`.
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
Expand Down Expand Up @@ -346,27 +351,15 @@ impl TokenCursor {
.collect::<TokenStream>(),
);

self.stack.push(mem::replace(
&mut self.frame,
TokenCursorFrame::new(
None,
if attr_style == AttrStyle::Inner {
[
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
body,
]
.into_iter()
.collect::<TokenStream>()
} else {
[TokenTree::token_alone(token::Pound, span), body]
.into_iter()
.collect::<TokenStream>()
},
),
));

self.next(/* desugar_doc_comments */ false)
if attr_style == AttrStyle::Inner {
vec![
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
body,
]
} else {
vec![TokenTree::token_alone(token::Pound, span), body]
}
}
}

Expand Down Expand Up @@ -475,7 +468,7 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(),
expected_tokens: Vec::new(),
token_cursor: TokenCursor {
frame: TokenCursorFrame::new(None, tokens),
tree_cursor: tokens.into_trees(),
stack: Vec::new(),
num_next_calls: 0,
desugar_doc_comments,
Expand Down Expand Up @@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> {
return looker(&self.token);
}

let frame = &self.token_cursor.frame;
if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible {
let tree_cursor = &self.token_cursor.tree_cursor;
if let Some(&(_, delim, span)) = self.token_cursor.stack.last()
&& delim != Delimiter::Invisible
{
let all_normal = (0..dist).all(|i| {
let token = frame.tree_cursor.look_ahead(i);
let token = tree_cursor.look_ahead(i);
!matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _)))
});
if all_normal {
return match frame.tree_cursor.look_ahead(dist - 1) {
return match tree_cursor.look_ahead(dist - 1) {
Some(tree) => match tree {
TokenTree::Token(token, _) => looker(token),
TokenTree::Delimited(dspan, delim, _) => {
Expand Down Expand Up @@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> {
pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
match self.token.kind {
token::OpenDelim(..) => {
// Grab the tokens from this frame.
let frame = &self.token_cursor.frame;
let stream = frame.tree_cursor.stream.clone();
let (delim, span) = frame.delim_sp.unwrap();
// Grab the tokens within the delimiters.
let tree_cursor = &self.token_cursor.tree_cursor;
let stream = tree_cursor.stream.clone();
let (_, delim, span) = *self.token_cursor.stack.last().unwrap();

// Advance the token cursor through the entire delimited
// sequence. After getting the `OpenDelim` we are *within* the
Expand Down
Loading