diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 088ae9ba44102..d2c7b1c0753da 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -202,21 +202,18 @@ impl Attribute { } } - // Named `get_tokens` to distinguish it from the `::tokens` method. - pub fn get_tokens(&self) -> TokenStream { - match &self.kind { - AttrKind::Normal(normal) => TokenStream::new( - normal - .tokens - .as_ref() - .unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}")) - .to_attr_token_stream() - .to_token_trees(), - ), - &AttrKind::DocComment(comment_kind, data) => TokenStream::token_alone( + pub fn token_trees(&self) -> Vec { + match self.kind { + AttrKind::Normal(ref normal) => normal + .tokens + .as_ref() + .unwrap_or_else(|| panic!("attribute is missing tokens: {self:?}")) + .to_attr_token_stream() + .to_token_trees(), + AttrKind::DocComment(comment_kind, data) => vec![TokenTree::token_alone( token::DocComment(comment_kind, self.style, data), self.span, - ), + )], } } } diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index ee068f19332a5..a92ef575777c7 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -16,7 +16,7 @@ use crate::ast::{AttrStyle, StmtKind}; use crate::ast_traits::{HasAttrs, HasTokens}; use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind}; -use crate::AttrVec; +use crate::{AttrVec, Attribute}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{self, Lrc}; @@ -179,11 +179,10 @@ impl AttrTokenStream { AttrTokenStream(Lrc::new(tokens)) } - /// Converts this `AttrTokenStream` to a plain `Vec`. - /// During conversion, `AttrTokenTree::AttrsTarget` get 'flattened' - /// back to a `TokenStream` of the form `outer_attr attr_target`. - /// If there are inner attributes, they are inserted into the proper - /// place in the attribute target tokens. + /// Converts this `AttrTokenStream` to a plain `Vec`. During + /// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a + /// `TokenStream`, as described in the comment on + /// `attrs_and_tokens_to_token_trees`. pub fn to_token_trees(&self) -> Vec { let mut res = Vec::with_capacity(self.0.len()); for tree in self.0.iter() { @@ -200,51 +199,7 @@ impl AttrTokenStream { )) } AttrTokenTree::AttrsTarget(target) => { - let idx = target - .attrs - .partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer)); - let (outer_attrs, inner_attrs) = target.attrs.split_at(idx); - - let mut target_tokens = target.tokens.to_attr_token_stream().to_token_trees(); - if !inner_attrs.is_empty() { - let mut found = false; - // Check the last two trees (to account for a trailing semi) - for tree in target_tokens.iter_mut().rev().take(2) { - if let TokenTree::Delimited(span, spacing, delim, delim_tokens) = tree { - // Inner attributes are only supported on extern blocks, functions, - // impls, and modules. All of these have their inner attributes - // placed at the beginning of the rightmost outermost braced group: - // e.g. fn foo() { #![my_attr] } - // - // Therefore, we can insert them back into the right location - // without needing to do any extra position tracking. - // - // Note: Outline modules are an exception - they can - // have attributes like `#![my_attr]` at the start of a file. - // Support for custom attributes in this position is not - // properly implemented - we always synthesize fake tokens, - // so we never reach this code. - - let mut stream = TokenStream::default(); - for inner_attr in inner_attrs { - stream.push_stream(inner_attr.get_tokens()); - } - stream.push_stream(delim_tokens.clone()); - *tree = TokenTree::Delimited(*span, *spacing, *delim, stream); - found = true; - break; - } - } - - assert!( - found, - "Failed to find trailing delimited group in: {target_tokens:?}" - ); - } - for attr in outer_attrs { - res.extend(attr.get_tokens().0.iter().cloned()); - } - res.extend(target_tokens); + attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res); } } } @@ -252,15 +207,76 @@ impl AttrTokenStream { } } +// Converts multiple attributes and the tokens for a target AST node into token trees, and appends +// them to `res`. +// +// Example: if the AST node is "fn f() { blah(); }", then: +// - Simple if no attributes are present, e.g. "fn f() { blah(); }" +// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }" +// - Trickier if inner attributes are present, because they must be moved within the AST node's +// tokens, e.g. "#[outer] fn f() { #![inner] blah() }" +fn attrs_and_tokens_to_token_trees( + attrs: &[Attribute], + target_tokens: &LazyAttrTokenStream, + res: &mut Vec, +) { + let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer)); + let (outer_attrs, inner_attrs) = attrs.split_at(idx); + + // Add outer attribute tokens. + for attr in outer_attrs { + res.extend(attr.token_trees()); + } + + // Add target AST node tokens. + res.extend(target_tokens.to_attr_token_stream().to_token_trees()); + + // Insert inner attribute tokens. + if !inner_attrs.is_empty() { + let mut found = false; + // Check the last two trees (to account for a trailing semi) + for tree in res.iter_mut().rev().take(2) { + if let TokenTree::Delimited(span, spacing, delim, delim_tokens) = tree { + // Inner attributes are only supported on extern blocks, functions, + // impls, and modules. All of these have their inner attributes + // placed at the beginning of the rightmost outermost braced group: + // e.g. fn foo() { #![my_attr] } + // + // Therefore, we can insert them back into the right location + // without needing to do any extra position tracking. + // + // Note: Outline modules are an exception - they can + // have attributes like `#![my_attr]` at the start of a file. + // Support for custom attributes in this position is not + // properly implemented - we always synthesize fake tokens, + // so we never reach this code. + let mut tts = vec![]; + for inner_attr in inner_attrs { + tts.extend(inner_attr.token_trees()); + } + tts.extend(delim_tokens.0.iter().cloned()); + let stream = TokenStream::new(tts); + *tree = TokenTree::Delimited(*span, *spacing, *delim, stream); + found = true; + break; + } + } + assert!(found, "Failed to find trailing delimited group in: {res:?}"); + } +} + /// Stores the tokens for an attribute target, along /// with its attributes. /// /// This is constructed during parsing when we need to capture -/// tokens. +/// tokens, for `cfg` and `cfg_attr` attributes. /// /// For example, `#[cfg(FALSE)] struct Foo {}` would /// have an `attrs` field containing the `#[cfg(FALSE)]` attr, /// and a `tokens` field storing the (unparsed) tokens `struct Foo {}` +/// +/// The `cfg`/`cfg_attr` processing occurs in +/// `StripUnconfigured::configure_tokens`. #[derive(Clone, Debug, Encodable, Decodable)] pub struct AttrsTarget { /// Attributes, both outer and inner. @@ -437,18 +453,10 @@ impl TokenStream { } pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream { - let Some(tokens) = node.tokens() else { - panic!("missing tokens for node: {:?}", node); - }; - let attrs = node.attrs(); - let attr_stream = if attrs.is_empty() { - tokens.to_attr_token_stream() - } else { - let target = - AttrsTarget { attrs: attrs.iter().cloned().collect(), tokens: tokens.clone() }; - AttrTokenStream::new(vec![AttrTokenTree::AttrsTarget(target)]) - }; - TokenStream::new(attr_stream.to_token_trees()) + let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node)); + let mut tts = vec![]; + attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts); + TokenStream::new(tts) } pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream { diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index 40e16b4511575..9da4aa84db525 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -187,6 +187,7 @@ impl<'a> StripUnconfigured<'a> { .iter() .filter_map(|tree| match tree.clone() { AttrTokenTree::AttrsTarget(mut target) => { + // Expand any `cfg_attr` attributes. target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr)); if self.in_cfg(&target.attrs) { @@ -195,6 +196,8 @@ impl<'a> StripUnconfigured<'a> { ); Some(AttrTokenTree::AttrsTarget(target)) } else { + // Remove the target if there's a `cfg` attribute and + // the condition isn't satisfied. None } } @@ -253,9 +256,9 @@ impl<'a> StripUnconfigured<'a> { /// Gives a compiler warning when the `cfg_attr` contains no attributes and /// is in the original source file. Gives a compiler error if the syntax of /// the attribute is incorrect. - pub(crate) fn expand_cfg_attr(&self, attr: &Attribute, recursive: bool) -> Vec { + pub(crate) fn expand_cfg_attr(&self, cfg_attr: &Attribute, recursive: bool) -> Vec { let Some((cfg_predicate, expanded_attrs)) = - rustc_parse::parse_cfg_attr(attr, &self.sess.psess) + rustc_parse::parse_cfg_attr(cfg_attr, &self.sess.psess) else { return vec![]; }; @@ -264,7 +267,7 @@ impl<'a> StripUnconfigured<'a> { if expanded_attrs.is_empty() { self.sess.psess.buffer_lint( rustc_lint_defs::builtin::UNUSED_ATTRIBUTES, - attr.span, + cfg_attr.span, ast::CRATE_NODE_ID, BuiltinLintDiag::CfgAttrNoAttributes, ); @@ -280,20 +283,21 @@ impl<'a> StripUnconfigured<'a> { // `#[cfg_attr(false, cfg_attr(true, some_attr))]`. expanded_attrs .into_iter() - .flat_map(|item| self.process_cfg_attr(&self.expand_cfg_attr_item(attr, item))) + .flat_map(|item| self.process_cfg_attr(&self.expand_cfg_attr_item(cfg_attr, item))) .collect() } else { - expanded_attrs.into_iter().map(|item| self.expand_cfg_attr_item(attr, item)).collect() + expanded_attrs + .into_iter() + .map(|item| self.expand_cfg_attr_item(cfg_attr, item)) + .collect() } } fn expand_cfg_attr_item( &self, - attr: &Attribute, + cfg_attr: &Attribute, (item, item_span): (ast::AttrItem, Span), ) -> Attribute { - let orig_tokens = attr.get_tokens(); - // We are taking an attribute of the form `#[cfg_attr(pred, attr)]` // and producing an attribute of the form `#[attr]`. We // have captured tokens for `attr` itself, but we need to @@ -302,11 +306,11 @@ impl<'a> StripUnconfigured<'a> { // Use the `#` in `#[cfg_attr(pred, attr)]` as the `#` token // for `attr` when we expand it to `#[attr]` - let mut orig_trees = orig_tokens.trees(); + let mut orig_trees = cfg_attr.token_trees().into_iter(); let TokenTree::Token(pound_token @ Token { kind: TokenKind::Pound, .. }, _) = orig_trees.next().unwrap().clone() else { - panic!("Bad tokens for attribute {attr:?}"); + panic!("Bad tokens for attribute {cfg_attr:?}"); }; // We don't really have a good span to use for the synthesized `[]` @@ -320,12 +324,12 @@ impl<'a> StripUnconfigured<'a> { .unwrap_or_else(|| panic!("Missing tokens for {item:?}")) .to_attr_token_stream(), ); - let trees = if attr.style == AttrStyle::Inner { + let trees = if cfg_attr.style == AttrStyle::Inner { // For inner attributes, we do the same thing for the `!` in `#![some_attr]` let TokenTree::Token(bang_token @ Token { kind: TokenKind::Not, .. }, _) = orig_trees.next().unwrap().clone() else { - panic!("Bad tokens for attribute {attr:?}"); + panic!("Bad tokens for attribute {cfg_attr:?}"); }; vec![ AttrTokenTree::Token(pound_token, Spacing::Joint), @@ -340,7 +344,7 @@ impl<'a> StripUnconfigured<'a> { &self.sess.psess.attr_id_generator, item, tokens, - attr.style, + cfg_attr.style, item_span, ); if attr.has_name(sym::crate_type) { diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 5522127be83ef..4454747ea0212 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -157,14 +157,14 @@ pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> Tok } pub fn parse_cfg_attr( - attr: &Attribute, + cfg_attr: &Attribute, psess: &ParseSess, ) -> Option<(MetaItem, Vec<(AttrItem, Span)>)> { const CFG_ATTR_GRAMMAR_HELP: &str = "#[cfg_attr(condition, attribute, other_attribute, ...)]"; const CFG_ATTR_NOTE_REF: &str = "for more information, visit \ "; - match attr.get_normal_item().args { + match cfg_attr.get_normal_item().args { ast::AttrArgs::Delimited(ast::DelimArgs { dspan, delim, ref tokens }) if !tokens.is_empty() => { @@ -180,7 +180,7 @@ pub fn parse_cfg_attr( } _ => { psess.dcx().emit_err(errors::MalformedCfgAttr { - span: attr.span, + span: cfg_attr.span, sugg: CFG_ATTR_GRAMMAR_HELP, }); } diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 38f18022e3c58..e90568f177c26 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -103,11 +103,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { // produce an empty `TokenStream` if no calls were made, and omit the // final token otherwise. let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) - .chain(iter::repeat_with(|| { - let token = cursor_snapshot.next(); - (FlatToken::Token(token.0), token.1) - })) + let tokens = iter::once(FlatToken::Token(self.start_token.clone())) + .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) .take(self.num_calls as usize); if self.replace_ranges.is_empty() { @@ -156,11 +153,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { (range.start as usize)..(range.end as usize), target .into_iter() - .map(|target| (FlatToken::AttrsTarget(target), Spacing::Alone)) - .chain( - iter::repeat((FlatToken::Empty, Spacing::Alone)) - .take(range.len() - target_len), - ), + .map(|target| FlatToken::AttrsTarget(target)) + .chain(iter::repeat(FlatToken::Empty).take(range.len() - target_len)), ); } make_attr_token_stream(tokens.into_iter(), self.break_last_token) @@ -301,21 +295,22 @@ impl<'a> Parser<'a> { let num_calls = end_pos - start_pos; - // If we have no attributes, then we will never need to - // use any replace ranges. - let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg { - Box::new([]) - } else { - // Grab any replace ranges that occur *inside* the current AST node. - // We will perform the actual replacement when we convert the `LazyAttrTokenStream` - // to an `AttrTokenStream`. - self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] - .iter() - .cloned() - .chain(inner_attr_replace_ranges.iter().cloned()) - .map(|(range, data)| ((range.start - start_pos)..(range.end - start_pos), data)) - .collect() - }; + // This is hot enough for `deep-vector` that checking the conditions for an empty iterator + // is measurably faster than actually executing the iterator. + let replace_ranges: Box<[ReplaceRange]> = + if replace_ranges_start == replace_ranges_end && inner_attr_replace_ranges.is_empty() { + Box::new([]) + } else { + // Grab any replace ranges that occur *inside* the current AST node. + // We will perform the actual replacement when we convert the `LazyAttrTokenStream` + // to an `AttrTokenStream`. + self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] + .iter() + .cloned() + .chain(inner_attr_replace_ranges.iter().cloned()) + .map(|(range, data)| ((range.start - start_pos)..(range.end - start_pos), data)) + .collect() + }; let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl { start_token, @@ -325,12 +320,9 @@ impl<'a> Parser<'a> { replace_ranges, }); - // If we support tokens at all - if let Some(target_tokens) = ret.tokens_mut() { - if target_tokens.is_none() { - // Store our newly captured tokens into the AST node. - *target_tokens = Some(tokens.clone()); - } + // If we support tokens and don't already have them, store the newly captured tokens. + if let Some(target_tokens @ None) = ret.tokens_mut() { + *target_tokens = Some(tokens.clone()); } let final_attrs = ret.attrs(); @@ -370,7 +362,7 @@ impl<'a> Parser<'a> { /// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and /// close delims. fn make_attr_token_stream( - mut iter: impl Iterator, + iter: impl Iterator, break_last_token: bool, ) -> AttrTokenStream { #[derive(Debug)] @@ -379,19 +371,19 @@ fn make_attr_token_stream( open_delim_sp: Option<(Delimiter, Span, Spacing)>, inner: Vec, } - let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }]; - let mut token_and_spacing = iter.next(); - while let Some((token, spacing)) = token_and_spacing { - match token { - FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => { - stack - .push(FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }); + // The stack always has at least one element. Storing it separately makes for shorter code. + let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; + let mut stack_rest = vec![]; + for flat_token in iter { + match flat_token { + FlatToken::Token((Token { kind: TokenKind::OpenDelim(delim), span }, spacing)) => { + stack_rest.push(mem::replace( + &mut stack_top, + FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, + )); } - FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => { - let frame_data = stack - .pop() - .unwrap_or_else(|| panic!("Token stack was empty for token: {token:?}")); - + FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => { + let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); assert_eq!( open_delim, delim, @@ -401,29 +393,20 @@ fn make_attr_token_stream( let dspacing = DelimSpacing::new(open_spacing, spacing); let stream = AttrTokenStream::new(frame_data.inner); let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); - stack - .last_mut() - .unwrap_or_else(|| panic!("Bottom token frame is missing for token: {token:?}")) - .inner - .push(delimited); + stack_top.inner.push(delimited); + } + FlatToken::Token((token, spacing)) => { + stack_top.inner.push(AttrTokenTree::Token(token, spacing)) + } + FlatToken::AttrsTarget(target) => { + stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) } - FlatToken::Token(token) => stack - .last_mut() - .expect("Bottom token frame is missing!") - .inner - .push(AttrTokenTree::Token(token, spacing)), - FlatToken::AttrsTarget(target) => stack - .last_mut() - .expect("Bottom token frame is missing!") - .inner - .push(AttrTokenTree::AttrsTarget(target)), FlatToken::Empty => {} } - token_and_spacing = iter.next(); } - let mut final_buf = stack.pop().expect("Missing final buf!"); + if break_last_token { - let last_token = final_buf.inner.pop().unwrap(); + let last_token = stack_top.inner.pop().unwrap(); if let AttrTokenTree::Token(last_token, spacing) = last_token { let unglued_first = last_token.kind.break_two_token_op().unwrap().0; @@ -431,14 +414,14 @@ fn make_attr_token_stream( let mut first_span = last_token.span.shrink_to_lo(); first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1)); - final_buf + stack_top .inner .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing)); } else { panic!("Unexpected last token {last_token:?}") } } - AttrTokenStream::new(final_buf.inner) + AttrTokenStream::new(stack_top.inner) } // Some types are used a lot. Make sure they don't unintentionally get bigger. diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 45ca267fe5d1e..1482c665a16b7 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -1607,7 +1607,7 @@ pub(crate) fn make_unclosed_delims_error( enum FlatToken { /// A token - this holds both delimiter (e.g. '{' and '}') /// and non-delimiter tokens - Token(Token), + Token((Token, Spacing)), /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted /// directly into the constructed `AttrTokenStream` as an /// `AttrTokenTree::AttrsTarget`.