Skip to content

parse_tt: a few more tweaks #95794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 11 additions & 27 deletions compiler/rustc_expand/src/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,22 +81,12 @@ use rustc_session::parse::ParseSess;
use rustc_span::symbol::MacroRulesNormalizedIdent;
use rustc_span::Span;

use smallvec::{smallvec, SmallVec};

use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lrc;
use rustc_span::symbol::Ident;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};

// One element is enough to cover 95-99% of vectors for most benchmarks. Also, vectors longer than
// one frequently have many elements, not just two or three.
type NamedMatchVec = SmallVec<[NamedMatch; 1]>;

// This type is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);

/// A unit within a matcher that a `MatcherPos` can refer to. Similar to (and derived from)
/// `mbe::TokenTree`, but designed specifically for fast and easy traversal during matching.
/// Notable differences to `mbe::TokenTree`:
Expand Down Expand Up @@ -221,7 +211,11 @@ struct MatcherPos {
/// with one element per metavar decl in the matcher. Each element records token trees matched
/// against the relevant metavar by the black box parser. An element will be a `MatchedSeq` if
/// the corresponding metavar decl is within a sequence.
matches: Lrc<NamedMatchVec>,
///
/// It is critical to performance that this is an `Lrc`, because it gets cloned frequently when
/// processing sequences. Mostly for sequence-ending possibilities that must be tried but end
/// up failing.
matches: Lrc<Vec<NamedMatch>>,
}

// This type is used a lot. Make sure it doesn't unintentionally get bigger.
Expand All @@ -246,18 +240,12 @@ impl MatcherPos {
let mut curr = &mut matches[metavar_idx];
for _ in 0..seq_depth - 1 {
match curr {
MatchedSeq(seq) => {
let seq = Lrc::make_mut(seq);
curr = seq.last_mut().unwrap();
}
MatchedSeq(seq) => curr = seq.last_mut().unwrap(),
_ => unreachable!(),
}
}
match curr {
MatchedSeq(seq) => {
let seq = Lrc::make_mut(seq);
seq.push(m);
}
MatchedSeq(seq) => seq.push(m),
_ => unreachable!(),
}
}
Expand Down Expand Up @@ -350,7 +338,7 @@ pub(super) fn count_metavar_decls(matcher: &[TokenTree]) -> usize {
/// ```
#[derive(Debug, Clone)]
crate enum NamedMatch {
MatchedSeq(Lrc<NamedMatchVec>),
MatchedSeq(Vec<NamedMatch>),

// A metavar match of type `tt`.
MatchedTokenTree(rustc_ast::tokenstream::TokenTree),
Expand Down Expand Up @@ -388,7 +376,7 @@ pub struct TtParser {

/// Pre-allocate an empty match array, so it can be cloned cheaply for macros with many rules
/// that have no metavars.
empty_matches: Lrc<NamedMatchVec>,
empty_matches: Lrc<Vec<NamedMatch>>,
}

impl TtParser {
Expand All @@ -398,7 +386,7 @@ impl TtParser {
cur_mps: vec![],
next_mps: vec![],
bb_mps: vec![],
empty_matches: Lrc::new(smallvec![]),
empty_matches: Lrc::new(vec![]),
}
}

Expand Down Expand Up @@ -452,11 +440,7 @@ impl TtParser {
} => {
// Install an empty vec for each metavar within the sequence.
for metavar_idx in next_metavar..next_metavar + num_metavar_decls {
mp.push_match(
metavar_idx,
seq_depth,
MatchedSeq(self.empty_matches.clone()),
);
mp.push_match(metavar_idx, seq_depth, MatchedSeq(vec![]));
}

if op == KleeneOp::ZeroOrMore || op == KleeneOp::ZeroOrOne {
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,8 @@ pub fn compile_declarative_macro(
let argument_gram = mbe::macro_parser::compute_locs(&sess.parse_sess, &argument_gram);

let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
let mut tt_parser = TtParser::new(def.ident);
let mut tt_parser =
TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
Copy link
Contributor

@petrochenkov petrochenkov Apr 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is reasonable while we are still matching the macro definition against $( $lhs:tt => $rhs:tt );+ reusing macro_rules machinery.

However, we really should stop pretending that macro_rules! foo { ... } or macro foo (...) { ... } are macro invocations.
This involves removing all the comments saying that, introducing a custom parser for the $( $lhs:tt => $rhs:tt );+ syntax (and its macro 2.0 equivalent), and storing the output from such parser (the lowered macro representation) to crate metadata, so that macro definitions are not reparsed as raw token streams by every dependent crate.

let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
Success(m) => m,
Failure(token, msg) => {
Expand Down