Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions peg-macros/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,21 @@ impl Grammar {
#[derive(Debug)]
pub enum Item {
Use(TokenStream),
InjectVar(InjectVar),
Rule(Rule),
}

#[derive(Debug)]
pub struct InjectVar {
pub doc: Option<TokenStream>,
pub name: Ident,
pub input_param: Ident,
pub lpos_param: Ident,
pub rpos_param: Ident,
pub ty: TokenStream,
pub body: Group,
}

#[derive(Debug)]
pub enum Cache {
Simple,
Expand Down
289 changes: 251 additions & 38 deletions peg-macros/grammar.rs

Large diffs are not rendered by default.

13 changes: 9 additions & 4 deletions peg-macros/grammar.rustpeg
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ pub rule peg_grammar() -> Grammar
rule grammar_args() -> Vec<(Ident, TokenStream)>
= "(" args:((i:IDENT() ":" t:$(rust_type()) { (i, t) })**",") ","? ")" { args }

rule item() -> Item
= u:rust_use() { Item::Use(u) }
/ r:peg_rule() { Item::Rule(r) }
/ f:inject_var() { Item::InjectVar(f) }

rule inject_var() -> InjectVar
= doc:rust_doc_comment() "inject" name:IDENT() "(" input_param:IDENT() "," lpos_param:IDENT() "," rpos_param:IDENT() ")" "->" ty:$(rust_type()) body:BRACE_GROUP()
{ InjectVar { doc, name, input_param, lpos_param, rpos_param, ty, body } }

rule peg_rule() -> Rule
= doc:rust_doc_comment() cache:cacheflag() no_eof:no_eof_flag() visibility:rust_visibility()
span:sp() "rule"
Expand All @@ -41,10 +50,6 @@ rule peg_rule() -> Rule
rule rule_params() -> Vec<RuleParam>
= "(" params:(x:(name:IDENT() ":" ty:rule_param_ty() { RuleParam { name, ty} }) ++ "," ","? {x})? ")" { params.unwrap_or_default() }

rule item() -> Item
= u:rust_use() { Item::Use(u) }
/ r:peg_rule() { Item::Rule(r) }

rule rust_doc_comment() -> Option<TokenStream> = $(("#" "[" "doc" "=" LITERAL() "]")*)?
rule rust_attribute() = "#" "[" rust_path() (DELIM_GROUP() / "=" LITERAL()) "]"

Expand Down
118 changes: 98 additions & 20 deletions peg-macros/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,21 +52,26 @@ struct Context<'a> {
parse_state_ty: TokenStream,
extra_args_call: TokenStream,
extra_args_def: TokenStream,
injected_vars: TokenStream,
}

pub(crate) fn compile_grammar(grammar: &Grammar) -> TokenStream {
let analysis = analysis::check(grammar);

let grammar_lifetime_params = ty_params_slice(&grammar.lifetime_params);
let extra_args_def = extra_args_def(grammar);
let extra_args_call = extra_args_call(grammar);
let injected_vars = invoke_injected_vars(grammar, &extra_args_call);

let context = &Context {
rules: &analysis.rules,
rules_from_args: HashSet::new(),
grammar_lifetime_params,
input_ty: quote!(&'input Input<#(#grammar_lifetime_params),*>),
parse_state_ty: quote!(&mut ParseState<'input #(, #grammar_lifetime_params)*>),
extra_args_call: extra_args_call(grammar),
extra_args_def: extra_args_def(grammar),
extra_args_call,
extra_args_def,
injected_vars,
};

let mut seen_rule_names = HashSet::new();
Expand All @@ -75,6 +80,7 @@ pub(crate) fn compile_grammar(grammar: &Grammar) -> TokenStream {
for item in &grammar.items {
match item {
Item::Use(tt) => items.push(tt.clone()),
Item::InjectVar(var) => items.push(compile_inject_func(context, var)),
Item::Rule(rule) => {
if !seen_rule_names.insert(rule.name.to_string()) {
items.push(report_error(
Expand Down Expand Up @@ -208,6 +214,61 @@ fn rule_params_list(context: &Context, rule: &Rule) -> Vec<TokenStream> {
}).collect()
}

fn compile_inject_func(context: &Context, var: &InjectVar) -> TokenStream {
let span = var.name.span().resolved_at(Span::mixed_site());

let InjectVar {
doc,
name,
input_param,
lpos_param,
rpos_param,
ty,
body,
} = var;

let name = format_ident!("__inject_{}", name, span = span);

let Context {
input_ty,
grammar_lifetime_params,
extra_args_def,
..
} = context;

quote_spanned! { span =>
#doc
fn #name<'input #(, #grammar_lifetime_params)*>(
#input_param: #input_ty,
#lpos_param: usize,
#rpos_param: usize
#extra_args_def
) -> #ty #body
}
}

fn invoke_injected_vars(grammar: &Grammar, extra_args_call: &TokenStream) -> TokenStream {
let vars = grammar
.items
.iter()
.filter_map(|item| match item {
Item::InjectVar(var) => Some(var),
_ => None,
})
.map(|var| {
let name = &var.name;
let name_fn = format_ident!("__inject_{}", var.name);
let span = var.name.span().resolved_at(Span::mixed_site());
quote_spanned! { span =>
#[allow(unused)]
let #name = #name_fn(__input, __lpos, __pos #extra_args_call);
}
})
.collect::<Vec<_>>();

quote!(#(#vars)*)
}

/// Compile a rule to a function for use internal to the grammar.
/// Returns `RuleResult<T>`.
fn compile_rule(context: &Context, rule: &Rule) -> TokenStream {
Expand Down Expand Up @@ -777,28 +838,44 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
}}
}

Expr::Action(ref exprs, ref code) => labeled_seq(context, exprs, {
if let Some(code) = code {
let code_span = code.span().resolved_at(Span::mixed_site());

// Peek and see if the first token in the block is '?'. If so, it's a conditional block
if let Some(body) = group_check_prefix(code, '?') {
quote_spanned! {code_span =>
match (||{ #body })() {
Ok(res) => ::peg::RuleResult::Matched(__pos, res),
Err(expected) => {
__err_state.mark_failure(__pos, expected);
::peg::RuleResult::Failed
},
Expr::Action(ref exprs, ref code) => {
let seq = labeled_seq(context, exprs, {
if let Some(code) = code {
let injected_vars = &context.injected_vars;
let code_span = code.span().resolved_at(Span::mixed_site());

// Peek and see if the first token in the block is '?'. If so, it's a conditional block
if let Some(body) = group_check_prefix(code, '?') {
quote_spanned! {code_span =>
match (||{ #injected_vars #body })() {
Ok(res) => ::peg::RuleResult::Matched(__pos, res),
Err(expected) => {
__err_state.mark_failure(__pos, expected);
::peg::RuleResult::Failed
},
}
}
} else {
let body = code.stream();
quote_spanned! {code_span => ::peg::RuleResult::Matched(__pos, (|| {
#injected_vars
#body
} )()) }
}
} else {
quote_spanned! {code_span => ::peg::RuleResult::Matched(__pos, (||#code)()) }
quote_spanned! { span => ::peg::RuleResult::Matched(__pos, ()) }
}
});

if context.injected_vars.is_empty() {
seq
} else {
quote_spanned! { span => ::peg::RuleResult::Matched(__pos, ()) }
quote_spanned! { span => {
let __lpos = __pos;
#seq
}}
}
}),
}
Expr::MatchStr(ref expr) => {
let inner = compile_expr(context, expr, false);
quote_spanned! { span => {
Expand Down Expand Up @@ -826,6 +903,7 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
}

Expr::Precedence { ref levels } => {
let injected_vars = &context.injected_vars;
let mut pre_rules = Vec::new();
let mut level_code = Vec::new();
let mut span_capture: Option<(TokenStream, TokenStream, TokenStream, &Group)> = None;
Expand All @@ -848,8 +926,8 @@ fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenS
let right_arg = &op.elements[op.elements.len() - 1];
let r_arg = name_or_ignore(right_arg.name.as_ref());

let action = &op.action;
let action = quote_spanned!(op.action.span()=>(||#action)());
let action = &op.action.stream();
let action = quote_spanned!(op.action.span()=>(||{ #injected_vars #action })());

let action = if let Some((lpos_name, val_name, rpos_name, wrap_action)) =
&span_capture
Expand Down
18 changes: 18 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,24 @@
//! The `precedence!{}` syntax is another way to handle nested operators and avoid
//! repeatedly matching an expression rule.
//!
//! ## Injected variables
//!
//! When building an AST, you commonly want to attach location information to each node.
//!
//! `inject name(input, lpos, rpos) -> Type { expr }` defines an internal function
//! that is evaluated before entering each action code block. It is passed the full
//! input and the `usize` start and end positions of the text matched by the sequence
//! leading up to the action block. The returned value is available in the block as
//! the variable with the same name.
//!
//! For example,
//!
//! ```rust,ignore
//! inject span(_input, lpos, rpos) -> Range<usize> { lpos..rpos }
//! ```
//!
//! defines a variable `span` that is a `Range` of the matched text for each action block.
//!
//! ## Tracing
//!
//! If you pass the `peg/trace` feature to Cargo when building your project, a
Expand Down
2 changes: 1 addition & 1 deletion tests/compile-fail/syntax_error.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate peg;

peg::parser!(grammar foo() for str {
fn asdf() {} //~ ERROR expected one of "#", "crate", "pub", "rule", "use", "}"
fn asdf() {} //~ ERROR expected one of "#", "inject", "crate", "pub", "rule", "use", "}"
});

fn main() {}
4 changes: 2 additions & 2 deletions tests/compile-fail/syntax_error.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
error: expected one of "#", "pub", "rule", "use", "}"
error: expected one of "#", "inject", "pub", "rule", "use", "}"
--> tests/compile-fail/syntax_error.rs:4:5
|
4 | fn asdf() {} //~ ERROR expected one of "#", "crate", "pub", "rule", "use", "}"
4 | fn asdf() {} //~ ERROR expected one of "#", "inject", "crate", "pub", "rule", "use", "}"
| ^^
108 changes: 108 additions & 0 deletions tests/pass/inject_span.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use std::ops::Range;

#[derive(Debug, PartialEq)]
struct Node {
span: Range<usize>,
kind: NodeKind,
}

#[derive(Debug, PartialEq)]
enum NodeKind {
Number(i64),
Add(Box<Node>, Box<Node>),
Sub(Box<Node>, Box<Node>),
Mul(Box<Node>, Box<Node>),
Div(Box<Node>, Box<Node>),
Factorial(Box<Node>),
Neg(Box<Node>),
Group(Box<Node>),
Var(String),
}

peg::parser!( grammar lang() for str {
inject span(_input, lpos, rpos) -> Range<usize> { lpos..rpos }

rule number() -> Node
= n:$(['0'..='9']+) {? match n.parse() {
Ok(n) => Ok(Node { span, kind: NodeKind::Number(n) }),
Err(_) => Err("number too large"),
}}

rule var() -> Node
= v:$(['a'..='z']+) { Node { span, kind: NodeKind::Var(v.to_string()) } }

pub rule expr() -> Node = precedence!{
x:(@) "+" y:@ { Node { span, kind: NodeKind::Add(Box::new(x), Box::new(y)) } }
x:(@) "-" y:@ { Node { span, kind: NodeKind::Sub(Box::new(x), Box::new(y)) } }
"-" v:@ { Node { span, kind: NodeKind::Neg(Box::new(v)) } }
--
x:(@) "*" y:@ { Node { span, kind: NodeKind::Mul(Box::new(x), Box::new(y)) } }
x:(@) "/" y:@ { Node { span, kind: NodeKind::Div(Box::new(x), Box::new(y)) } }
--
v:@ "!" { Node { span, kind: NodeKind::Factorial(Box::new(v)) } }
--
"(" v:expr() ")" { Node { span, kind: NodeKind::Group(Box::new(v)) } }
v:var() { v }
n:number() { n }
}
});

#[test]
fn main() {
assert_eq!(
lang::expr("3+3*(-33+v!)"),
Ok(Node {
span: 0..12,
kind: NodeKind::Add(
Box::new(Node {
span: 0..1,
kind: NodeKind::Number(3),
}),
Box::new(Node {
span: 2..12,
kind: NodeKind::Mul(
Box::new(Node {
span: 2..3,
kind: NodeKind::Number(3),
}),
Box::new(Node {
span: 4..12,
kind: NodeKind::Group(Box::new(Node {
span: 5..11,
kind: NodeKind::Add(
Box::new(Node {
span: 5..8,
kind: NodeKind::Neg(Box::new(Node {
span: 6..8,
kind: NodeKind::Number(33),
})),
}),
Box::new(Node {
span: 9..11,
kind: NodeKind::Factorial(Box::new(Node {
span: 9..10,
kind: NodeKind::Var("v".to_string()),
})),
}),
),
})),
}),
)
}),
),
})
);
}

peg::parser!( grammar inject2(offset: usize) for str {
inject span(_input, lpos, rpos) -> Range<usize> { (offset + lpos)..(offset + rpos) }
inject text(input, lpos, rpos) -> &'input str { &input[lpos..rpos] }

pub rule test() -> (Range<usize>, String)
= "abc" { (span, text.to_string()) }
});

#[test]
fn inject2() {
assert_eq!(inject2::test("abc", 10), Ok((10..13, "abc".into())));
}
1 change: 1 addition & 0 deletions tests/pass/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod custom_expr;
mod errors;
mod generic_fn_traits;
mod grammar_with_args_and_cache;
mod inject_span;
mod keyval;
mod lifetimes;
mod memoization;
Expand Down
Loading