diff --git a/CHANGELOG.md b/CHANGELOG.md index 93cfe06b4..616f8774e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,9 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit! - Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem! - Add line and column number to TokenizerError (#194) - thanks @Dandandan! +- Use Token::EOF instead of Option (#195) - Make the units keyword following `INTERVAL '...'` optional (#184) - thanks @maxcountryman! +- Generalize `DATE`/`TIME`/`TIMESTAMP` literals representation in the AST (`TypedString { data_type, value }`) and allow `DATE` and other keywords to be used as identifiers when not followed by a string (#187) - thanks @maxcountryman! ### Added - Support MSSQL `TOP () [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo! @@ -26,6 +28,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented - Support `LISTAGG()` (#174) - thanks @maxcountryman! - Support the string concatentation operator `||` (#178) - thanks @Dandandan! - Support bitwise AND (`&`), OR (`|`), XOR (`^`) (#181) - thanks @Dandandan! +- Add serde support to AST structs and enums (#196) - thanks @panarch! ### Fixed - Report an error for unterminated string literals (#165) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index da8088842..fe5cba0de 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -210,6 +210,10 @@ pub enum Expr { Nested(Box), /// A literal value, such as string, number, date or NULL Value(Value), + /// A constant of form ` 'value'`. + /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), + /// as well as constants of other types (a non-standard PostgreSQL extension). + TypedString { data_type: DataType, value: String }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// `CASE [] WHEN THEN ... [ELSE ] END` @@ -284,6 +288,10 @@ impl fmt::Display for Expr { Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation), Expr::Nested(ast) => write!(f, "({})", ast), Expr::Value(v) => write!(f, "{}", v), + Expr::TypedString { data_type, value } => { + write!(f, "{}", data_type)?; + write!(f, " '{}'", &value::escape_single_quote_string(value)) + } Expr::Function(fun) => write!(f, "{}", fun), Expr::Case { operand, diff --git a/src/ast/value.rs b/src/ast/value.rs index c5b0f3a88..9e82c175d 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -33,12 +33,6 @@ pub enum Value { HexStringLiteral(String), /// Boolean value true or false Boolean(bool), - /// `DATE '...'` literals - Date(String), - /// `TIME '...'` literals - Time(String), - /// `TIMESTAMP '...'` literals - Timestamp(String), /// INTERVAL literals, roughly in the following format: /// `INTERVAL '' [ [ () ] ] /// [ TO [ () ] ]`, @@ -70,9 +64,6 @@ impl fmt::Display for Value { Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), Value::Boolean(v) => write!(f, "{}", v), - Value::Date(v) => write!(f, "DATE '{}'", escape_single_quote_string(v)), - Value::Time(v) => write!(f, "TIME '{}'", escape_single_quote_string(v)), - Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)), Value::Interval { value, leading_field: Some(DateTimeField::Second), diff --git a/src/parser.rs b/src/parser.rs index 2dc612dfc..a9b147fed 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -35,6 +35,15 @@ macro_rules! parser_err { }; } +// Returns a successful result if the optional expression is some +macro_rules! return_ok_if_some { + ($e:expr) => {{ + if let Some(v) = $e { + return Ok(v); + } + }}; +} + #[derive(PartialEq)] pub enum IsOptional { Optional, @@ -172,6 +181,40 @@ impl Parser { /// Parse an expression prefix pub fn parse_prefix(&mut self) -> Result { + // PostgreSQL allows any string literal to be preceded by a type name, indicating that the + // string literal represents a literal of that type. Some examples: + // + // DATE '2020-05-20' + // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' + // BOOL 'true' + // + // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating + // matters is the fact that INTERVAL string literals may optionally be followed by special + // keywords, e.g.: + // + // INTERVAL '7' DAY + // + // Note also that naively `SELECT date` looks like a syntax error because the `date` type + // name is not followed by a string literal, but in fact in PostgreSQL it is a valid + // expression that should parse as the column name "date". + return_ok_if_some!(self.maybe_parse(|parser| { + match parser.parse_data_type()? { + DataType::Interval => parser.parse_literal_interval(), + // PosgreSQL allows almost any identifier to be used as custom data type name, + // and we support that in `parse_data_type()`. But unlike Postgres we don't + // have a list of globally reserved keywords (since they vary across dialects), + // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type + // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of + // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the + // `type 'string'` syntax for the custom data types at all. + DataType::Custom(..) => parser_err!("dummy"), + data_type => Ok(Expr::TypedString { + data_type, + value: parser.parse_literal_string()?, + }), + } + })); + let expr = match self.next_token() { Token::Word(w) => match w.keyword { Keyword::TRUE | Keyword::FALSE | Keyword::NULL => { @@ -180,7 +223,6 @@ impl Parser { } Keyword::CASE => self.parse_case_expr(), Keyword::CAST => self.parse_cast_expr(), - Keyword::DATE => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))), Keyword::EXISTS => self.parse_exists_expr(), Keyword::EXTRACT => self.parse_extract_expr(), Keyword::INTERVAL => self.parse_literal_interval(), @@ -189,10 +231,6 @@ impl Parser { op: UnaryOperator::Not, expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?), }), - Keyword::TIME => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))), - Keyword::TIMESTAMP => { - Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?))) - } // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token() { @@ -907,6 +945,22 @@ impl Parser { Ok(values) } + /// Run a parser method `f`, reverting back to the current position + /// if unsuccessful. + #[must_use] + fn maybe_parse(&mut self, mut f: F) -> Option + where + F: FnMut(&mut Parser) -> Result, + { + let index = self.index; + if let Ok(t) = f(self) { + Some(t) + } else { + self.index = index; + None + } + } + /// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a /// `ParserError` if both `ALL` and `DISTINCT` are fround. pub fn parse_all_or_distinct(&mut self) -> Result { @@ -1898,7 +1952,6 @@ impl Parser { } if self.consume_token(&Token::LParen) { - let index = self.index; // A left paren introduces either a derived table (i.e., a subquery) // or a nested join. It's nearly impossible to determine ahead of // time which it is... so we just try to parse both. @@ -1915,30 +1968,26 @@ impl Parser { // | (2) starts a nested join // (1) an additional set of parens around a nested join // - match self.parse_derived_table_factor(NotLateral) { - // The recently consumed '(' started a derived table, and we've - // parsed the subquery, followed by the closing ')', and the - // alias of the derived table. In the example above this is - // case (3), and the next token would be `NATURAL`. - Ok(table_factor) => Ok(table_factor), - Err(_) => { - // A parsing error from `parse_derived_table_factor` indicates that - // the '(' we've recently consumed does not start a derived table - // (cases 1, 2, or 4). Ignore the error and back up to where we - // were before - right after the opening '('. - self.index = index; - - // Inside the parentheses we expect to find a table factor - // followed by some joins or another level of nesting. - let table_and_joins = self.parse_table_and_joins()?; - self.expect_token(&Token::RParen)?; - // The SQL spec prohibits derived and bare tables from appearing - // alone in parentheses. We don't enforce this as some databases - // (e.g. Snowflake) allow such syntax. - Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) - } - } + // If the recently consumed '(' starts a derived table, the call to + // `parse_derived_table_factor` below will return success after parsing the + // subquery, followed by the closing ')', and the alias of the derived table. + // In the example above this is case (3). + return_ok_if_some!( + self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral)) + ); + // A parsing error from `parse_derived_table_factor` indicates that the '(' we've + // recently consumed does not start a derived table (cases 1, 2, or 4). + // `maybe_parse` will ignore such an error and rewind to be after the opening '('. + + // Inside the parentheses we expect to find a table factor + // followed by some joins or another level of nesting. + let table_and_joins = self.parse_table_and_joins()?; + self.expect_token(&Token::RParen)?; + // The SQL spec prohibits derived and bare tables from appearing + // alone in parentheses. We don't enforce this as some databases + // (e.g. Snowflake) allow such syntax. + Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) } else { let name = self.parse_object_name()?; // Postgres, MSSQL: table-valued functions: diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9720c3972..3b55f8b8f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -413,6 +413,19 @@ fn parse_null_in_select() { ); } +#[test] +fn parse_select_with_date_column_name() { + let sql = "SELECT date"; + let select = verified_only_select(sql); + assert_eq!( + &Expr::Identifier(Ident { + value: "date".into(), + quote_style: None + }), + expr_from_projection(only(&select.projection)), + ); +} + #[test] fn parse_escaped_single_quote_string_predicate() { use self::BinaryOperator::*; @@ -1426,30 +1439,39 @@ fn parse_literal_string() { #[test] fn parse_literal_date() { - let sql = "SELECT DATE '1999-01-01'"; + let sql = "SELECT date '1999-01-01'"; let select = verified_only_select(sql); assert_eq!( - &Expr::Value(Value::Date("1999-01-01".into())), + &Expr::TypedString { + data_type: DataType::Date, + value: "1999-01-01".into() + }, expr_from_projection(only(&select.projection)), ); } #[test] fn parse_literal_time() { - let sql = "SELECT TIME '01:23:34'"; + let sql = "SELECT time '01:23:34'"; let select = verified_only_select(sql); assert_eq!( - &Expr::Value(Value::Time("01:23:34".into())), + &Expr::TypedString { + data_type: DataType::Time, + value: "01:23:34".into() + }, expr_from_projection(only(&select.projection)), ); } #[test] fn parse_literal_timestamp() { - let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'"; + let sql = "SELECT timestamp '1999-01-01 01:23:34'"; let select = verified_only_select(sql); assert_eq!( - &Expr::Value(Value::Timestamp("1999-01-01 01:23:34".into())), + &Expr::TypedString { + data_type: DataType::Timestamp, + value: "1999-01-01 01:23:34".into() + }, expr_from_projection(only(&select.projection)), ); }