From 2f54dcb783d9b77f7cd12c0b784b29271ed4b650 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 6 Jun 2020 07:38:57 +0300 Subject: [PATCH 01/13] Change CASE representation in AST --- src/ast/mod.rs | 33 +++++++++++++++++++----------- src/parser.rs | 11 +++++----- tests/sqlparser_common.rs | 42 ++++++++++++++++++++++----------------- 3 files changed, 50 insertions(+), 36 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2dbf42b29..55e570bcf 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -207,15 +207,10 @@ pub enum Expr { Value(Value), /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), - /// `CASE [] WHEN THEN ... [ELSE ] END` - /// - /// Note we only recognize a complete single expression as ``, - /// not `< 0` nor `1, 2, 3` as allowed in a `` per - /// + /// `CASE [] [ELSE ] END` Case { operand: Option>, - conditions: Vec, - results: Vec, + when_clauses: Vec, else_result: Option>, }, /// An exists expression `EXISTS(SELECT ...)`, used in expressions like @@ -282,17 +277,14 @@ impl fmt::Display for Expr { Expr::Function(fun) => write!(f, "{}", fun), Expr::Case { operand, - conditions, - results, + when_clauses, else_result, } => { f.write_str("CASE")?; if let Some(operand) = operand { write!(f, " {}", operand)?; } - for (c, r) in conditions.iter().zip(results) { - write!(f, " WHEN {} THEN {}", c, r)?; - } + write!(f, " {}", display_separated(when_clauses, " "))?; if let Some(else_result) = else_result { write!(f, " ELSE {}", else_result)?; @@ -306,6 +298,23 @@ impl fmt::Display for Expr { } } +/// An individual `WHEN THEN ` clause from a `CASE`. +/// +/// Note: we only recognize a complete single expression as ``, +/// not `< 0` nor `1, 2, 3` as allowed in a `` per +/// +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct WhenClause { + pub condition: Expr, + pub result: Expr, +} + +impl fmt::Display for WhenClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WHEN {} THEN {}", self.condition, self.result) + } +} + /// A window specification (i.e. `OVER (PARTITION BY .. ORDER BY .. etc.)`) #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct WindowSpec { diff --git a/src/parser.rs b/src/parser.rs index 9a22f4d19..25500fc16 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -360,12 +360,12 @@ impl Parser { operand = Some(Box::new(self.parse_expr()?)); self.expect_keyword("WHEN")?; } - let mut conditions = vec![]; - let mut results = vec![]; + let mut when_clauses = vec![]; loop { - conditions.push(self.parse_expr()?); + let condition = self.parse_expr()?; self.expect_keyword("THEN")?; - results.push(self.parse_expr()?); + let result = self.parse_expr()?; + when_clauses.push(WhenClause { condition, result }); if !self.parse_keyword("WHEN") { break; } @@ -378,8 +378,7 @@ impl Parser { self.expect_keyword("END")?; Ok(Expr::Case { operand, - conditions, - results, + when_clauses, else_result, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1e1c54e10..69efe0a26 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1641,23 +1641,27 @@ fn parse_searched_case_expr() { assert_eq!( &Case { operand: None, - conditions: vec![ - IsNull(Box::new(Identifier(Ident::new("bar")))), - BinaryOp { - left: Box::new(Identifier(Ident::new("bar"))), - op: Eq, - right: Box::new(Expr::Value(number("0"))) + when_clauses: vec![ + WhenClause { + condition: IsNull(Box::new(Identifier(Ident::new("bar")))), + result: Expr::Value(Value::SingleQuotedString("null".to_string())), + }, + WhenClause { + condition: BinaryOp { + left: Box::new(Identifier(Ident::new("bar"))), + op: Eq, + right: Box::new(Expr::Value(number("0"))) + }, + result: Expr::Value(Value::SingleQuotedString("=0".to_string())), + }, + WhenClause { + condition: BinaryOp { + left: Box::new(Identifier(Ident::new("bar"))), + op: GtEq, + right: Box::new(Expr::Value(number("0"))) + }, + result: Expr::Value(Value::SingleQuotedString(">=0".to_string())), }, - BinaryOp { - left: Box::new(Identifier(Ident::new("bar"))), - op: GtEq, - right: Box::new(Expr::Value(number("0"))) - } - ], - results: vec![ - Expr::Value(Value::SingleQuotedString("null".to_string())), - Expr::Value(Value::SingleQuotedString("=0".to_string())), - Expr::Value(Value::SingleQuotedString(">=0".to_string())) ], else_result: Some(Box::new(Expr::Value(Value::SingleQuotedString( "<0".to_string() @@ -1676,8 +1680,10 @@ fn parse_simple_case_expr() { assert_eq!( &Case { operand: Some(Box::new(Identifier(Ident::new("foo")))), - conditions: vec![Expr::Value(number("1"))], - results: vec![Expr::Value(Value::SingleQuotedString("Y".to_string())),], + when_clauses: vec![WhenClause { + condition: Expr::Value(number("1")), + result: Expr::Value(Value::SingleQuotedString("Y".to_string())), + }], else_result: Some(Box::new(Expr::Value(Value::SingleQuotedString( "N".to_string() )))) From 46b2b61e857ea9cfc93a0af2bf7438f22c582886 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 May 2020 07:28:07 +0300 Subject: [PATCH 02/13] Improve code comments --- src/parser.rs | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 25500fc16..11a57c008 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -157,7 +157,9 @@ impl Parser { self.parse_subexpr(0) } - /// Parse tokens until the precedence changes + /// Parse an expression, that either follows an operator with the + /// specified `precedence` or starts at the beginning, in which case + /// the `precedence` is 0 (representing the lowest binding power). pub fn parse_subexpr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; @@ -169,6 +171,18 @@ impl Parser { break; } + // Here next_precedence > precedence... i.e. the following operator + // has higher binding power than the operator to the left of `expr` + // In the following illustration, we're at the second (and the + // last) iteration of this loop. + // + // expr + // _______ + // a + b * c * d + e + // ^ ^ + // | |< current token (returned by `peek_token()`; + // `precedence` has `next_precedence`) + // expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) @@ -684,7 +698,8 @@ impl Parser { const BETWEEN_PREC: u8 = 20; const PLUS_MINUS_PREC: u8 = 30; - /// Get the precedence of the next token + /// Get the precedence of the next unprocessed token (or multiple + /// tokens, in cases like `NOT IN`) pub fn get_next_precedence(&self) -> Result { if let Some(token) = self.peek_token() { debug!("get_next_precedence() {:?}", token); @@ -1261,7 +1276,7 @@ impl Parser { }) } - /// Parse a copy statement + /// Parse a PostgreSQL `COPY` statement pub fn parse_copy(&mut self) -> Result { let table_name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; @@ -1275,14 +1290,8 @@ impl Parser { }) } - /// Parse a tab separated values in - /// COPY payload + /// Parse a tab separated values in PostgreSQL `COPY` payload fn parse_tsv(&mut self) -> Result>, ParserError> { - let values = self.parse_tab_value()?; - Ok(values) - } - - fn parse_tab_value(&mut self) -> Result>, ParserError> { let mut values = vec![]; let mut content = String::from(""); while let Some(t) = self.next_token_no_skip() { From 06fe8dbd2fd7a46e350bfae14b4e9b6bb9510c36 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Fri, 1 May 2020 04:04:45 +0300 Subject: [PATCH 03/13] Split parse_statements from parse_sql `Parser::parse_sql()` is intended to be only a helper, the user should be able to use it or not, without having to re-implement parts of the parsing logic. --- src/parser.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 11a57c008..62b8ec7e8 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -86,22 +86,26 @@ impl Parser { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize()?; let mut parser = Parser::new(tokens); + parser.parse_statements() + } + + /// Parse zero or more SQL statements delimited with semicolon. + pub fn parse_statements(&mut self) -> Result, ParserError> { let mut stmts = Vec::new(); let mut expecting_statement_delimiter = false; - debug!("Parsing sql '{}'...", sql); loop { // ignore empty statements (between successive statement delimiters) - while parser.consume_token(&Token::SemiColon) { + while self.consume_token(&Token::SemiColon) { expecting_statement_delimiter = false; } - if parser.peek_token().is_none() { + if self.peek_token().is_none() { break; } else if expecting_statement_delimiter { - return parser.expected("end of statement", parser.peek_token()); + return self.expected("end of statement", self.peek_token()); } - let statement = parser.parse_statement()?; + let statement = self.parse_statement()?; stmts.push(statement); expecting_statement_delimiter = true; } From 77a8adf2f1fa5d48f291f95196060a971ab2b7a3 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 May 2020 13:18:36 +0300 Subject: [PATCH 04/13] Introduce parser.start/reset to support back-tracking Adjusting parser's `self.index` always felt like a hack, and it becomes more important as I need to store more state in my "lossless syntax tree" prototype. --- src/parser.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 62b8ec7e8..970d39d39 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -34,6 +34,12 @@ macro_rules! parser_err { }; } +/// The parser state +pub struct Marker { + /// position in the token stream (`parser.index`) + index: usize, +} + #[derive(PartialEq)] pub enum IsOptional { Optional, @@ -742,6 +748,14 @@ impl Parser { } } + pub fn start(&mut self) -> Marker { + Marker { index: self.index } + } + + pub fn reset(&mut self, m: Marker) { + self.index = m.index; + } + /// Return the first non-whitespace token that has not yet been processed /// (or None if reached end-of-file) pub fn peek_token(&self) -> Option { @@ -827,12 +841,10 @@ impl Parser { /// Look for an expected sequence of keywords and consume them if they exist #[must_use] pub fn parse_keywords(&mut self, keywords: Vec<&'static str>) -> bool { - let index = self.index; + let checkpoint = self.start(); for keyword in keywords { if !self.parse_keyword(&keyword) { - //println!("parse_keywords aborting .. did not find {}", keyword); - // reset index and return immediately - self.index = index; + self.reset(checkpoint); return false; } } @@ -1920,7 +1932,7 @@ impl Parser { } if self.consume_token(&Token::LParen) { - let index = self.index; + let checkpoint = self.start(); // A left paren introduces either a derived table (i.e., a subquery) // or a nested join. It's nearly impossible to determine ahead of // time which it is... so we just try to parse both. @@ -1948,7 +1960,7 @@ impl Parser { // the '(' we've recently consumed does not start a derived table // (cases 1, 2, or 4). Ignore the error and back up to where we // were before - right after the opening '('. - self.index = index; + self.reset(checkpoint); // Inside the parentheses we expect to find a table factor // followed by some joins or another level of nesting. From f1721135f22d251ceafb2727255aba7273b00b71 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 6 Jun 2020 08:24:46 +0300 Subject: [PATCH 05/13] Update the README for the CST branch --- README.md | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 184 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 24674d911..170a31dee 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,187 @@ -# Extensible SQL Lexer and Parser for Rust +> _The following pertains to the `cst` branch; the [upstream README is below](#upstream-readme)._ +> +> ⚠️ This branch is regularly rebased. Please let me know before working off it to coordinate. + +**Preserving full source code information ([#161](https://github.com/andygrove/sqlparser-rs/issues/161)) would enable SQL rewriting/refactoring tools based on sqlparser-rs.** For example: +1. **Error reporting**, both in the parser and in later stages of query processing, would benefit from knowing the source code location of SQL constructs ([#179](https://github.com/andygrove/sqlparser-rs/issues/179)) +2. **SQL pretty-printing** requires comments to be preserved in AST (see [#175](https://github.com/andygrove/sqlparser-rs/issues/175), mentioning [forma](https://github.com/maxcountryman/forma)) +3. **Refactoring via AST transformations** would also benefit from having full control over serialization, a possible solution for dialect-specific "writers" ([#18](https://github.com/andygrove/sqlparser-rs/issues/18)) +4. Analyzing partially invalid code may be useful in the context of an IDE or other tooling. + +**I think that adopting [rust-analyzer's design][ra-syntax], that includes a lossless syntax tree, is the right direction for sqlparser-rs.** In addition to solving the use-cases described above, it helps in other ways: + +5. We can omit syntax that does not affect semantics of the query (e.g. [`ROW` vs `ROWS`](https://github.com/andygrove/sqlparser-rs/blob/418b9631ce9c24cf9bb26cf7dd9e42edd29de985/src/ast/query.rs#L416)) from the typed AST by default, reducing the implementation effort. +6. Having a homogenous syntax tree also alleviates the need for a "visitor" ([#114](https://github.com/andygrove/sqlparser-rs/pull/114)), also reducing the burden on implementors of new syntax + +In 2020 many new people contributed to `sqlparser-rs`, some bringing up the use-cases above. I found myself mentioning this design multiple times, so I felt I should "show the code" instead of just talking about it. + +Current typed AST vs rowan +========================== + +To recap, the input SQL is currently parsed directly into _typed AST_ - with each node of the tree represented by a Rust `struct`/`enum` of a specific type, referring to other structs of specific type, such as: + + struct Select { + pub projection: Vec, + ... + } + +We try to retain most of "important" syntax in this representation (including, for example, [`Ident::quote_style`](https://github.com/andygrove/sqlparser-rs/blob/d32df527e68dd76d857f47ea051a3ec22138469b/src/ast/mod.rs#L77) and [`OffsetRows`](https://github.com/andygrove/sqlparser-rs/blob/418b9631ce9c24cf9bb26cf7dd9e42edd29de985/src/ast/query.rs#L416)), but there doesn't seem to be a practical way to extend it to also store whitespace, comments, and source code spans. + +The lossless syntax tree +------------------------ + +In the alternative design, the parser produces a tree (which I'll call "CST", [not 100% correct though it is](https://dev.to/cad97/lossless-syntax-trees-280c)), in which every node has has the same Rust type (`SyntaxNode`), and a numeric `SyntaxKind` determines what kind of node it is. Under the hood, the leaf and the non-leaf nodes are different: + +* Each leaf node stores a slice of the source text; +* Each intermediate node represents a string obtained by concatenatenating the text of its children; +* The root node, consequently, represents exactly the original source code. + +_(The actual [rust-analyzer's design][ra-syntax] is more involved, but the details are not relevant to this discussion.)_ + +As an example, an SQL query "`select DISTINCT /* ? */ 1234`" could be represented as a tree like the following one: + + SELECT@0..29 + Keyword@0..6 "select" + Whitespace@6..7 " " + DISTINCT_OR_ALL + Keyword@7..15 "DISTINCT" + SELECT_ITEM_UNNAMED@16..29 + Whitespace@16..17 " " + Comment@17..24 "/* ? */" + Whitespace@24..25 " " + Number@25..29 "1234" + +_(Using the `SyntaxKind@start_pos..end_pos "relevant source code"` notation)_ + +Note how all the formatting and comments are retained. + +Such tree data structure is available for re-use as a separate crate ([`rowan`](https://github.com/rust-analyzer/rowan)), and **as the proof-of-concept I extended the parser to populate a rowan-based tree _along with the typed AST_, for a few of the supported SQL constructs.** + +Open question: The future design of the typed AST +------------------------------------------------- + +Though included in the PoC, **constructing both an AST and a CST in parallel should be considered a transitional solution only**, as it will not let us reap the full benefits of the proposed design (esp. points 1, 4, and 5). Additionally, the current one-AST-fits-all approach makes every new feature in the parser a (semver) breaking change, and makes the types as loose as necessary to fit the common denominator (e.g. if something is optional in one dialect, it has to be optional in the AST). + +What can we do instead? + +### Rust-analyzer's AST + +In rust-analyzer the AST layer does not store any additional data. Instead a "newtype" (a struct with exactly one field - the underlying CST node) is defined for each AST node type: + + struct Select { syntax: SyntaxNode }; // the newtype + impl Select { + fn syntax(&self) -> &SyntaxNode { &self.syntax } + fn cast(syntax: SyntaxNode) -> Option { + match syntax.kind { + SyntaxKind::SELECT => Some(Select { syntax }), + _ => None, + } + } + ... + +Such newtypes define APIs to let the user navigate the AST through accessors specific to this node type: + + // ...`impl Select` continued + pub fn distinct_or_all(&self) -> Option { + AstChildren::new(&self.syntax).next() + } + pub fn projection(&self) -> AstChildren { + AstChildren::new(&self.syntax) + } + +These accessors go through the node's direct childen, looking for nodes of a specific `SyntaxKind` (by trying to `cast()` them to the requested output type). + +This approach is a good fit for IDEs, as it can work on partial / invalid source code due to its lazy nature. Whether it is acceptable in other contexts is an open question ([though it was **not** rejected w.r.t rust-analyzer and rustc sharing a libsyntax2.0](https://github.com/rust-lang/rfcs/pull/2256)). + +### Code generation and other options for the AST + +Though the specific form of the AST is yet to be determined, it seems necessary to use some form of automation to build an AST based on a CST, so that we don't have 3 places (the parser, the AST, and the CST->AST converter) to keep synchronised. + +Rust-analyzer implements its own simple code generator, which would [generate](https://github.com/rust-analyzer/rust-analyzer/blob/a0be39296d2925972cacd9fbf8b5fb258fad6947/xtask/src/codegen/gen_syntax.rs#L47) methods like the above based on a definition [like](https://github.com/rust-analyzer/rust-analyzer/blob/a0be39296d2925972cacd9fbf8b5fb258fad6947/xtask/src/ast_src.rs#L293) this: + + const AST_SRC: AstSrc = AstSrc { + nodes: &ast_nodes! { + struct Select { + DistinctOrAll, + projection: [SelectItem], + ... + +_(Here the `ast_nodes!` macro converts something that looks like a simplified `struct` declaration to a literal value describing the struct's name and fields.)_ + +A similar approach could be tried to eagerly build an AST akin to our current one [[*](#ref-1)]. A quick survey of our AST reveals some incompatibilities between the rust-analyzer-style codegen and our use-case: + +* In rust-analyzer all AST enums use fieldless variants (`enum Foo { Bar(Bar), Baz(Baz) }`), making codegen easier. sqlparser uses variants with fields, though there was a request to move to fieldless ([#40](https://github.com/andygrove/sqlparser-rs/issues/40)). + + In my view, our motivation here was conciseness and inertia (little reason to rewrite, and much effort needed to update - both the library, including the tests, and the consumers). I think this can change. + +* RA's codegen assumes that the _type_ of a node usually determines its relation to its parent: different fields in a code-generated struct have to be of different types, as all children of a given type are available from a single "field". Odd cases like `BinExpr`'s `lhs` and `rhs` (both having the type `Expr`) are [implemented manually](https://github.com/rust-analyzer/rust-analyzer/blob/a0be39296d2925972cacd9fbf8b5fb258fad6947/crates/ra_syntax/src/ast/expr_extensions.rs#L195). + + It's clear this does not work as well for an AST like ours. In rare cases there's a clear problem with our AST (e.g. `WhenClause` for `CASE` expressions is introduced on this branch), but consider: + + pub struct Select { + //... + pub projection: Vec, + pub where: Option, + pub group_by: Vec, + pub having: Option, + + The CST for this should probably have separate branches for `WHERE`, `GROUP BY` and `HAVING` at least. Should we introduce additional types like `Where` or make codegen handle this somehow? + +* Large portions of the `struct`s in our AST are allocated at once. We use `Box` only where necessary to break the cycles. RA's codegen doesn't have a way to specify these points. + +Of course we're not limited to stealing ideas from rust-analyzer, so alternatives can be considered. + +* Should we code-gen based on a real AST definition instead of a quasi-Rust code inside a macro like `ast_nodes`? +* Can `serde` be of help? + +I think the design of the CST should be informed by the needs of the AST, so **this is the key question for me.** I've extracted the types and the fields of the current AST into a table (see `ast-stats.js` and `ast-fields.tsv` in `util/`) to help come up with a solution. + + +Other tasks +----------- + +Other than coming up with AST/CST design, there are a number of things to do: + +* Upstream the "Support parser backtracking in the GreenNodeBuilder" commit to avoid importing a copy of `GreenNodeBuilder` into sqlparser-rs +* Setting up the testing infrastructure for the CST (rust-analyzer, again, has some good ideas here) + + +- Fix `Token`/`SyntaxKind` duplication, changing the former to + store a slice of the original source code, e.g. + `(SyntaxKind, SmolStr)` + + This should also fix the currently disabled test-cases where `Token`'s + `to_string()` does not return the original string: + + * `parse_escaped_single_quote_string_predicate` + * `parse_literal_string` (the case of `HexLiteralString`) + +- Fix the hack in `parse_keyword()` to remap the token type (RA has + `bump_remap() for this) + +- Fix the `Parser::pending` hack (needs rethinking parser's API) + + Probably related is the issue of handling whitespace and comments: + the way this prototype handles it looks wrong. + +Remarks +------- + +1. [*] During such eager construction of an AST we could also bail on CST nodes that have no place in the typed AST. This seems a part of the possible solution to the dialect problem: this way the parser can recognize a dialect-specific construct, while each consumer can pick which bits they want to support by defining their own typed AST. + + +[ra-syntax]: https://github.com/rust-analyzer/rust-analyzer/blob/master/docs/dev/syntax.md + + + + + + + + + +# Upstream README [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![Version](https://img.shields.io/crates/v/sqlparser.svg)](https://crates.io/crates/sqlparser) From 8f2019a3321c8d432e5cce0def81e62f2b8dfb68 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 6 Jun 2020 10:52:26 +0300 Subject: [PATCH 06/13] Add a script to extract the list of AST fields into a table Part of the `cst` branch, see its README for details. --- util/ast-fields.tsv | 226 ++++ util/ast-stats.js | 103 ++ util/test-data.js | 3068 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 3397 insertions(+) create mode 100644 util/ast-fields.tsv create mode 100644 util/ast-stats.js create mode 100644 util/test-data.js diff --git a/util/ast-fields.tsv b/util/ast-fields.tsv new file mode 100644 index 000000000..a77239caa --- /dev/null +++ b/util/ast-fields.tsv @@ -0,0 +1,226 @@ +struct_name enum_name variant_name field_name field_type +ColumnDef name Ident +ColumnDef data_type DataType +ColumnDef collation Option +ColumnDef options Vec +ColumnOptionDef name Option +ColumnOptionDef option ColumnOption +Ident value String +Ident quote_style Option +ObjectName unnamed Vec +WhenClause condition Expr +WhenClause result Expr +WindowSpec partition_by Vec +WindowSpec order_by Vec +WindowSpec window_frame Option +WindowFrame units WindowFrameUnits +WindowFrame start_bound WindowFrameBound +WindowFrame end_bound Option +Assignment id Ident +Assignment value Expr +Function name ObjectName +Function args Vec +Function over Option +Function distinct bool +ListAgg distinct bool +ListAgg expr Box +ListAgg separator Option> +ListAgg on_overflow Option +ListAgg within_group Vec +SqlOption name Ident +SqlOption value Value +Query ctes Vec +Query body SetExpr +Query order_by Vec +Query limit Option +Query offset Option +Query fetch Option +Select distinct bool +Select top Option +Select projection Vec +Select from Vec +Select selection Option +Select group_by Vec +Select having Option +Cte alias TableAlias +Cte query Query +TableWithJoins relation TableFactor +TableWithJoins joins Vec +TableAlias name Ident +TableAlias columns Vec +Join relation TableFactor +Join join_operator JoinOperator +OrderByExpr expr Expr +OrderByExpr asc Option +OrderByExpr nulls_first Option +Offset value Expr +Offset rows OffsetRows +Fetch with_ties bool +Fetch percent bool +Fetch quantity Option +Top with_ties bool +Top percent bool +Top quantity Option +Values unnamed Vec> +DataType::Char DataType Char unnamed Option +DataType::Varchar DataType Varchar unnamed Option +DataType::Clob DataType Clob unnamed u64 +DataType::Binary DataType Binary unnamed u64 +DataType::Varbinary DataType Varbinary unnamed u64 +DataType::Blob DataType Blob unnamed u64 +DataType::Decimal DataType Decimal unnamed Option +DataType::Decimal DataType Decimal unnamed Option +DataType::Float DataType Float unnamed Option +DataType::Custom DataType Custom unnamed ObjectName +DataType::Array DataType Array unnamed Box +AlterTableOperation::AddConstraint AlterTableOperation AddConstraint unnamed TableConstraint +AlterTableOperation::DropConstraint AlterTableOperation DropConstraint name Ident +TableConstraint::Unique TableConstraint Unique name Option +TableConstraint::Unique TableConstraint Unique columns Vec +TableConstraint::Unique TableConstraint Unique is_primary bool +TableConstraint::ForeignKey TableConstraint ForeignKey name Option +TableConstraint::ForeignKey TableConstraint ForeignKey columns Vec +TableConstraint::ForeignKey TableConstraint ForeignKey foreign_table ObjectName +TableConstraint::ForeignKey TableConstraint ForeignKey referred_columns Vec +TableConstraint::Check TableConstraint Check name Option +TableConstraint::Check TableConstraint Check expr Box +ColumnOption::Default ColumnOption Default unnamed Expr +ColumnOption::Unique ColumnOption Unique is_primary bool +ColumnOption::ForeignKey ColumnOption ForeignKey foreign_table ObjectName +ColumnOption::ForeignKey ColumnOption ForeignKey referred_columns Vec +ColumnOption::ForeignKey ColumnOption ForeignKey on_delete Option +ColumnOption::ForeignKey ColumnOption ForeignKey on_update Option +ColumnOption::Check ColumnOption Check unnamed Expr +Expr::Identifier Expr Identifier unnamed Ident +Expr::QualifiedWildcard Expr QualifiedWildcard unnamed Vec +Expr::CompoundIdentifier Expr CompoundIdentifier unnamed Vec +Expr::IsNull Expr IsNull unnamed Box +Expr::IsNotNull Expr IsNotNull unnamed Box +Expr::InList Expr InList expr Box +Expr::InList Expr InList list Vec +Expr::InList Expr InList negated bool +Expr::InSubquery Expr InSubquery expr Box +Expr::InSubquery Expr InSubquery subquery Box +Expr::InSubquery Expr InSubquery negated bool +Expr::Between Expr Between expr Box +Expr::Between Expr Between negated bool +Expr::Between Expr Between low Box +Expr::Between Expr Between high Box +Expr::BinaryOp Expr BinaryOp left Box +Expr::BinaryOp Expr BinaryOp op BinaryOperator +Expr::BinaryOp Expr BinaryOp right Box +Expr::UnaryOp Expr UnaryOp op UnaryOperator +Expr::UnaryOp Expr UnaryOp expr Box +Expr::Cast Expr Cast expr Box +Expr::Cast Expr Cast data_type DataType +Expr::Extract Expr Extract field DateTimeField +Expr::Extract Expr Extract expr Box +Expr::Collate Expr Collate expr Box +Expr::Collate Expr Collate collation ObjectName +Expr::Nested Expr Nested unnamed Box +Expr::Value Expr Value unnamed Value +Expr::Function Expr Function unnamed Function +Expr::Case Expr Case operand Option> +Expr::Case Expr Case when_clauses Vec +Expr::Case Expr Case else_result Option> +Expr::Exists Expr Exists unnamed Box +Expr::Subquery Expr Subquery unnamed Box +Expr::ListAgg Expr ListAgg unnamed ListAgg +WindowFrameBound::Preceding WindowFrameBound Preceding unnamed Option +WindowFrameBound::Following WindowFrameBound Following unnamed Option +Statement::Query Statement Query unnamed Box +Statement::Insert Statement Insert table_name ObjectName +Statement::Insert Statement Insert columns Vec +Statement::Insert Statement Insert source Box +Statement::Copy Statement Copy table_name ObjectName +Statement::Copy Statement Copy columns Vec +Statement::Copy Statement Copy values Vec> +Statement::Update Statement Update table_name ObjectName +Statement::Update Statement Update assignments Vec +Statement::Update Statement Update selection Option +Statement::Delete Statement Delete table_name ObjectName +Statement::Delete Statement Delete selection Option +Statement::CreateView Statement CreateView name ObjectName +Statement::CreateView Statement CreateView columns Vec +Statement::CreateView Statement CreateView query Box +Statement::CreateView Statement CreateView materialized bool +Statement::CreateView Statement CreateView with_options Vec +Statement::CreateTable Statement CreateTable name ObjectName +Statement::CreateTable Statement CreateTable columns Vec +Statement::CreateTable Statement CreateTable constraints Vec +Statement::CreateTable Statement CreateTable with_options Vec +Statement::CreateTable Statement CreateTable if_not_exists bool +Statement::CreateTable Statement CreateTable external bool +Statement::CreateTable Statement CreateTable file_format Option +Statement::CreateTable Statement CreateTable location Option +Statement::CreateIndex Statement CreateIndex name ObjectName +Statement::CreateIndex Statement CreateIndex table_name ObjectName +Statement::CreateIndex Statement CreateIndex columns Vec +Statement::CreateIndex Statement CreateIndex unique bool +Statement::CreateIndex Statement CreateIndex if_not_exists bool +Statement::AlterTable Statement AlterTable name ObjectName +Statement::AlterTable Statement AlterTable operation AlterTableOperation +Statement::Drop Statement Drop object_type ObjectType +Statement::Drop Statement Drop if_exists bool +Statement::Drop Statement Drop names Vec +Statement::Drop Statement Drop cascade bool +Statement::SetVariable Statement SetVariable local bool +Statement::SetVariable Statement SetVariable variable Ident +Statement::SetVariable Statement SetVariable value SetVariableValue +Statement::ShowVariable Statement ShowVariable variable Ident +Statement::ShowColumns Statement ShowColumns extended bool +Statement::ShowColumns Statement ShowColumns full bool +Statement::ShowColumns Statement ShowColumns table_name ObjectName +Statement::ShowColumns Statement ShowColumns filter Option +Statement::StartTransaction Statement StartTransaction modes Vec +Statement::SetTransaction Statement SetTransaction modes Vec +Statement::Commit Statement Commit chain bool +Statement::Rollback Statement Rollback chain bool +Statement::CreateSchema Statement CreateSchema schema_name ObjectName +ListAggOnOverflow::Truncate ListAggOnOverflow Truncate filler Option> +ListAggOnOverflow::Truncate ListAggOnOverflow Truncate with_count bool +TransactionMode::AccessMode TransactionMode AccessMode unnamed TransactionAccessMode +TransactionMode::IsolationLevel TransactionMode IsolationLevel unnamed TransactionIsolationLevel +ShowStatementFilter::Like ShowStatementFilter Like unnamed String +ShowStatementFilter::Where ShowStatementFilter Where unnamed Expr +SetVariableValue::Ident SetVariableValue Ident unnamed Ident +SetVariableValue::Literal SetVariableValue Literal unnamed Value +SetExpr::Select SetExpr Select unnamed Box), @@ -91,7 +91,7 @@ impl fmt::Display for SetExpr { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum SetOperator { Union, Except, @@ -111,7 +111,7 @@ impl fmt::Display for SetOperator { /// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may /// appear either as the only body item of an `SQLQuery`, or as an operand /// to a set operation like `UNION`. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Select { pub distinct: bool, /// MSSQL syntax: `TOP () [ PERCENT ] [ WITH TIES ]` @@ -155,7 +155,7 @@ impl fmt::Display for Select { /// The names in the column list before `AS`, when specified, replace the names /// of the columns returned by the query. The parser does not validate that the /// number of columns in the query matches the number of columns in the query. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Cte { pub alias: TableAlias, pub query: Query, @@ -168,7 +168,7 @@ impl fmt::Display for Cte { } /// One item of the comma-separated list following `SELECT` -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum SelectItem { /// Any expression, not followed by `[ AS ] alias` UnnamedExpr(Expr), @@ -191,7 +191,7 @@ impl fmt::Display for SelectItem { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct TableWithJoins { pub relation: TableFactor, pub joins: Vec, @@ -208,7 +208,7 @@ impl fmt::Display for TableWithJoins { } /// A table name or a parenthesized subquery with an optional alias -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum TableFactor { Table { name: ObjectName, @@ -273,7 +273,7 @@ impl fmt::Display for TableFactor { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct TableAlias { pub name: Ident, pub columns: Vec, @@ -289,7 +289,7 @@ impl fmt::Display for TableAlias { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Join { pub relation: TableFactor, pub join_operator: JoinOperator, @@ -354,7 +354,7 @@ impl fmt::Display for Join { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum JoinOperator { Inner(JoinConstraint), LeftOuter(JoinConstraint), @@ -367,7 +367,7 @@ pub enum JoinOperator { OuterApply, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum JoinConstraint { On(Expr), Using(Vec), @@ -375,7 +375,7 @@ pub enum JoinConstraint { } /// An `ORDER BY` expression -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct OrderByExpr { pub expr: Expr, /// Optional `ASC` or `DESC` @@ -401,7 +401,7 @@ impl fmt::Display for OrderByExpr { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Offset { pub value: Expr, pub rows: OffsetRows, @@ -414,7 +414,7 @@ impl fmt::Display for Offset { } /// Stores the keyword after `OFFSET ` -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum OffsetRows { /// Omitting ROW/ROWS is non-standard MySQL quirk. None, @@ -432,7 +432,7 @@ impl fmt::Display for OffsetRows { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Fetch { pub with_ties: bool, pub percent: bool, @@ -451,7 +451,7 @@ impl fmt::Display for Fetch { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Top { /// SQL semantic equivalent of LIMIT but with same structure as FETCH. pub with_ties: bool, @@ -471,7 +471,7 @@ impl fmt::Display for Top { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Values(pub Vec>); impl fmt::Display for Values { diff --git a/src/ast/value.rs b/src/ast/value.rs index fe2870f95..64af5e429 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -12,10 +12,11 @@ #[cfg(feature = "bigdecimal")] use bigdecimal::BigDecimal; +use serde::{Deserialize, Serialize}; use std::fmt; /// Primitive SQL values such as number and string -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] @@ -117,7 +118,7 @@ impl fmt::Display for Value { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum DateTimeField { Year, Month,