Skip to content

Commit e944a27

Browse files
committed
Auto merge of #16547 - Veykril:proc-macro-literals, r=Veykril
fix: Validate literals in proc-macro-srv FreeFunctions::literal_from_str cc #16446 meant to only get rid of some string allocs but then I noticed we can just implement this with the bare lexer.
2 parents 925705e + ed57008 commit e944a27

File tree

9 files changed

+110
-108
lines changed

9 files changed

+110
-108
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/proc-macro-srv/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ paths.workspace = true
2929
base-db.workspace = true
3030
span.workspace = true
3131
proc-macro-api.workspace = true
32-
syntax.workspace = true
32+
ra-ap-rustc_lexer.workspace = true
3333

3434
[dev-dependencies]
3535
expect-test = "1.4.0"

crates/proc-macro-srv/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ extern crate proc_macro;
2020
#[cfg(feature = "in-rust-tree")]
2121
extern crate rustc_driver as _;
2222

23+
#[cfg(not(feature = "in-rust-tree"))]
24+
extern crate ra_ap_rustc_lexer as rustc_lexer;
25+
#[cfg(feature = "in-rust-tree")]
26+
extern crate rustc_lexer;
27+
2328
mod dylib;
2429
mod proc_macros;
2530
mod server;

crates/proc-macro-srv/src/server.rs

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ pub mod rust_analyzer_span;
1717
mod symbol;
1818
pub mod token_id;
1919
pub use symbol::*;
20-
use syntax::ast::{self, IsString};
2120
use tt::Spacing;
2221

2322
fn delim_to_internal<S>(d: proc_macro::Delimiter, span: bridge::DelimSpan<S>) -> tt::Delimiter<S> {
@@ -55,32 +54,6 @@ fn spacing_to_external(spacing: Spacing) -> proc_macro::Spacing {
5554
}
5655
}
5756

58-
fn literal_to_external(literal_kind: ast::LiteralKind) -> Option<proc_macro::bridge::LitKind> {
59-
match literal_kind {
60-
ast::LiteralKind::String(data) => Some(if data.is_raw() {
61-
bridge::LitKind::StrRaw(data.raw_delimiter_count()?)
62-
} else {
63-
bridge::LitKind::Str
64-
}),
65-
66-
ast::LiteralKind::ByteString(data) => Some(if data.is_raw() {
67-
bridge::LitKind::ByteStrRaw(data.raw_delimiter_count()?)
68-
} else {
69-
bridge::LitKind::ByteStr
70-
}),
71-
ast::LiteralKind::CString(data) => Some(if data.is_raw() {
72-
bridge::LitKind::CStrRaw(data.raw_delimiter_count()?)
73-
} else {
74-
bridge::LitKind::CStr
75-
}),
76-
ast::LiteralKind::IntNumber(_) => Some(bridge::LitKind::Integer),
77-
ast::LiteralKind::FloatNumber(_) => Some(bridge::LitKind::Float),
78-
ast::LiteralKind::Char(_) => Some(bridge::LitKind::Char),
79-
ast::LiteralKind::Byte(_) => Some(bridge::LitKind::Byte),
80-
ast::LiteralKind::Bool(_) => None,
81-
}
82-
}
83-
8457
struct LiteralFormatter<S>(bridge::Literal<S, Symbol>);
8558

8659
impl<S> LiteralFormatter<S> {

crates/proc-macro-srv/src/server/rust_analyzer_span.rs

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ use std::{
1313
use ::tt::{TextRange, TextSize};
1414
use proc_macro::bridge::{self, server};
1515
use span::{Span, FIXUP_ERASED_FILE_AST_ID_MARKER};
16-
use syntax::ast::{self, IsString};
1716

1817
use crate::server::{
19-
delim_to_external, delim_to_internal, literal_to_external, token_stream::TokenStreamBuilder,
20-
LiteralFormatter, Symbol, SymbolInternerRef, SYMBOL_INTERNER,
18+
delim_to_external, delim_to_internal, token_stream::TokenStreamBuilder, LiteralFormatter,
19+
Symbol, SymbolInternerRef, SYMBOL_INTERNER,
2120
};
2221
mod tt {
2322
pub use ::tt::*;
@@ -71,32 +70,57 @@ impl server::FreeFunctions for RaSpanServer {
7170
&mut self,
7271
s: &str,
7372
) -> Result<bridge::Literal<Self::Span, Self::Symbol>, ()> {
74-
let literal = ast::Literal::parse(s).ok_or(())?;
75-
let literal = literal.tree();
73+
use proc_macro::bridge::LitKind;
74+
use rustc_lexer::{LiteralKind, Token, TokenKind};
75+
76+
let mut tokens = rustc_lexer::tokenize(s);
77+
let minus_or_lit = tokens.next().unwrap_or(Token { kind: TokenKind::Eof, len: 0 });
78+
79+
let lit = if minus_or_lit.kind == TokenKind::Minus {
80+
let lit = tokens.next().ok_or(())?;
81+
if !matches!(
82+
lit.kind,
83+
TokenKind::Literal {
84+
kind: LiteralKind::Int { .. } | LiteralKind::Float { .. },
85+
..
86+
}
87+
) {
88+
return Err(());
89+
}
90+
lit
91+
} else {
92+
minus_or_lit
93+
};
7694

77-
let kind = literal_to_external(literal.kind()).ok_or(())?;
95+
if tokens.next().is_some() {
96+
return Err(());
97+
}
7898

79-
// FIXME: handle more than just int and float suffixes
80-
let suffix = match literal.kind() {
81-
ast::LiteralKind::FloatNumber(num) => num.suffix().map(ToString::to_string),
82-
ast::LiteralKind::IntNumber(num) => num.suffix().map(ToString::to_string),
83-
_ => None,
99+
let TokenKind::Literal { kind, suffix_start } = lit.kind else { return Err(()) };
100+
let kind = match kind {
101+
LiteralKind::Int { .. } => LitKind::Integer,
102+
LiteralKind::Float { .. } => LitKind::Float,
103+
LiteralKind::Char { .. } => LitKind::Char,
104+
LiteralKind::Byte { .. } => LitKind::Byte,
105+
LiteralKind::Str { .. } => LitKind::Str,
106+
LiteralKind::ByteStr { .. } => LitKind::ByteStr,
107+
LiteralKind::CStr { .. } => LitKind::CStr,
108+
LiteralKind::RawStr { n_hashes } => LitKind::StrRaw(n_hashes.unwrap_or_default()),
109+
LiteralKind::RawByteStr { n_hashes } => {
110+
LitKind::ByteStrRaw(n_hashes.unwrap_or_default())
111+
}
112+
LiteralKind::RawCStr { n_hashes } => LitKind::CStrRaw(n_hashes.unwrap_or_default()),
84113
};
85114

86-
let text = match literal.kind() {
87-
ast::LiteralKind::String(data) => data.text_without_quotes().to_string(),
88-
ast::LiteralKind::ByteString(data) => data.text_without_quotes().to_string(),
89-
ast::LiteralKind::CString(data) => data.text_without_quotes().to_string(),
90-
_ => s.to_string(),
115+
let (lit, suffix) = s.split_at(suffix_start as usize);
116+
let suffix = match suffix {
117+
"" | "_" => None,
118+
suffix => Some(Symbol::intern(self.interner, suffix)),
91119
};
92-
let text = if let Some(ref suffix) = suffix { text.strip_suffix(suffix) } else { None }
93-
.unwrap_or(&text);
94-
95-
let suffix = suffix.map(|suffix| Symbol::intern(self.interner, &suffix));
96120

97121
Ok(bridge::Literal {
98122
kind,
99-
symbol: Symbol::intern(self.interner, text),
123+
symbol: Symbol::intern(self.interner, lit),
100124
suffix,
101125
span: self.call_site,
102126
})

crates/proc-macro-srv/src/server/token_id.rs

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@ use std::{
66
};
77

88
use proc_macro::bridge::{self, server};
9-
use syntax::ast::{self, IsString};
109

1110
use crate::server::{
12-
delim_to_external, delim_to_internal, literal_to_external, token_stream::TokenStreamBuilder,
13-
LiteralFormatter, Symbol, SymbolInternerRef, SYMBOL_INTERNER,
11+
delim_to_external, delim_to_internal, token_stream::TokenStreamBuilder, LiteralFormatter,
12+
Symbol, SymbolInternerRef, SYMBOL_INTERNER,
1413
};
1514
mod tt {
1615
pub use proc_macro_api::msg::TokenId;
@@ -63,32 +62,57 @@ impl server::FreeFunctions for TokenIdServer {
6362
&mut self,
6463
s: &str,
6564
) -> Result<bridge::Literal<Self::Span, Self::Symbol>, ()> {
66-
let literal = ast::Literal::parse(s).ok_or(())?;
67-
let literal = literal.tree();
65+
use proc_macro::bridge::LitKind;
66+
use rustc_lexer::{LiteralKind, Token, TokenKind};
67+
68+
let mut tokens = rustc_lexer::tokenize(s);
69+
let minus_or_lit = tokens.next().unwrap_or(Token { kind: TokenKind::Eof, len: 0 });
70+
71+
let lit = if minus_or_lit.kind == TokenKind::Minus {
72+
let lit = tokens.next().ok_or(())?;
73+
if !matches!(
74+
lit.kind,
75+
TokenKind::Literal {
76+
kind: LiteralKind::Int { .. } | LiteralKind::Float { .. },
77+
..
78+
}
79+
) {
80+
return Err(());
81+
}
82+
lit
83+
} else {
84+
minus_or_lit
85+
};
6886

69-
let kind = literal_to_external(literal.kind()).ok_or(())?;
87+
if tokens.next().is_some() {
88+
return Err(());
89+
}
7090

71-
// FIXME: handle more than just int and float suffixes
72-
let suffix = match literal.kind() {
73-
ast::LiteralKind::FloatNumber(num) => num.suffix().map(ToString::to_string),
74-
ast::LiteralKind::IntNumber(num) => num.suffix().map(ToString::to_string),
75-
_ => None,
91+
let TokenKind::Literal { kind, suffix_start } = lit.kind else { return Err(()) };
92+
let kind = match kind {
93+
LiteralKind::Int { .. } => LitKind::Integer,
94+
LiteralKind::Float { .. } => LitKind::Float,
95+
LiteralKind::Char { .. } => LitKind::Char,
96+
LiteralKind::Byte { .. } => LitKind::Byte,
97+
LiteralKind::Str { .. } => LitKind::Str,
98+
LiteralKind::ByteStr { .. } => LitKind::ByteStr,
99+
LiteralKind::CStr { .. } => LitKind::CStr,
100+
LiteralKind::RawStr { n_hashes } => LitKind::StrRaw(n_hashes.unwrap_or_default()),
101+
LiteralKind::RawByteStr { n_hashes } => {
102+
LitKind::ByteStrRaw(n_hashes.unwrap_or_default())
103+
}
104+
LiteralKind::RawCStr { n_hashes } => LitKind::CStrRaw(n_hashes.unwrap_or_default()),
76105
};
77106

78-
let text = match literal.kind() {
79-
ast::LiteralKind::String(data) => data.text_without_quotes().to_string(),
80-
ast::LiteralKind::ByteString(data) => data.text_without_quotes().to_string(),
81-
ast::LiteralKind::CString(data) => data.text_without_quotes().to_string(),
82-
_ => s.to_string(),
107+
let (lit, suffix) = s.split_at(suffix_start as usize);
108+
let suffix = match suffix {
109+
"" | "_" => None,
110+
suffix => Some(Symbol::intern(self.interner, suffix)),
83111
};
84-
let text = if let Some(ref suffix) = suffix { text.strip_suffix(suffix) } else { None }
85-
.unwrap_or(&text);
86-
87-
let suffix = suffix.map(|suffix| Symbol::intern(self.interner, &suffix));
88112

89113
Ok(bridge::Literal {
90114
kind,
91-
symbol: Symbol::intern(self.interner, text),
115+
symbol: Symbol::intern(self.interner, lit),
92116
suffix,
93117
span: self.call_site,
94118
})

crates/proc-macro-srv/src/tests/mod.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ fn test_fn_like_mk_idents() {
169169
fn test_fn_like_macro_clone_literals() {
170170
assert_expand(
171171
"fn_like_clone_tokens",
172-
r#"1u16, 2_u32, -4i64, 3.14f32, "hello bridge""#,
173-
expect![[r#"
172+
r###"1u16, 2_u32, -4i64, 3.14f32, "hello bridge", "suffixed"suffix, r##"raw"##"###,
173+
expect![[r###"
174174
SUBTREE $$ 1 1
175175
LITERAL 1u16 1
176176
PUNCH , [alone] 1
@@ -181,8 +181,12 @@ fn test_fn_like_macro_clone_literals() {
181181
PUNCH , [alone] 1
182182
LITERAL 3.14f32 1
183183
PUNCH , [alone] 1
184-
LITERAL "hello bridge" 1"#]],
185-
expect![[r#"
184+
LITERAL ""hello bridge"" 1
185+
PUNCH , [alone] 1
186+
LITERAL ""suffixed""suffix 1
187+
PUNCH , [alone] 1
188+
LITERAL r##"r##"raw"##"## 1"###]],
189+
expect![[r###"
186190
SUBTREE $$ SpanData { range: 0..100, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) } SpanData { range: 0..100, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
187191
LITERAL 1u16 SpanData { range: 0..4, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
188192
PUNCH , [alone] SpanData { range: 4..5, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
@@ -193,7 +197,11 @@ fn test_fn_like_macro_clone_literals() {
193197
PUNCH , [alone] SpanData { range: 18..19, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
194198
LITERAL 3.14f32 SpanData { range: 20..27, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
195199
PUNCH , [alone] SpanData { range: 27..28, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
196-
LITERAL "hello bridge" SpanData { range: 29..43, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }"#]],
200+
LITERAL ""hello bridge"" SpanData { range: 29..43, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
201+
PUNCH , [alone] SpanData { range: 43..44, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
202+
LITERAL ""suffixed""suffix SpanData { range: 45..61, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
203+
PUNCH , [alone] SpanData { range: 61..62, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }
204+
LITERAL r##"r##"raw"##"## SpanData { range: 63..73, anchor: SpanAnchor(FileId(42), 2), ctx: SyntaxContextId(0) }"###]],
197205
);
198206
}
199207

crates/syntax/src/ast/token_ext.rs

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,16 +204,6 @@ pub trait IsString: AstToken {
204204
assert!(TextRange::up_to(contents_range.len()).contains_range(range));
205205
Some(range + contents_range.start())
206206
}
207-
fn raw_delimiter_count(&self) -> Option<u8> {
208-
let text = self.text();
209-
let quote_range = self.text_range_between_quotes()?;
210-
let range_start = self.syntax().text_range().start();
211-
text[TextRange::up_to((quote_range - range_start).start())]
212-
.matches('#')
213-
.count()
214-
.try_into()
215-
.ok()
216-
}
217207
}
218208

219209
impl IsString for ast::String {

crates/syntax/src/lib.rs

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -182,28 +182,6 @@ impl SourceFile {
182182
}
183183
}
184184

185-
impl ast::Literal {
186-
pub fn parse(text: &str) -> Option<Parse<ast::Literal>> {
187-
let lexed = parser::LexedStr::new(text);
188-
let parser_input = lexed.to_input();
189-
let parser_output = parser::TopEntryPoint::Expr.parse(&parser_input);
190-
let (green, mut errors, _) = parsing::build_tree(lexed, parser_output);
191-
let root = SyntaxNode::new_root(green.clone());
192-
193-
errors.extend(validation::validate(&root));
194-
195-
if root.kind() == SyntaxKind::LITERAL {
196-
Some(Parse {
197-
green,
198-
errors: if errors.is_empty() { None } else { Some(errors.into()) },
199-
_ty: PhantomData,
200-
})
201-
} else {
202-
None
203-
}
204-
}
205-
}
206-
207185
impl ast::TokenTree {
208186
pub fn reparse_as_comma_separated_expr(self) -> Parse<ast::MacroEagerInput> {
209187
let tokens = self.syntax().descendants_with_tokens().filter_map(NodeOrToken::into_token);

0 commit comments

Comments
 (0)