Skip to content

support general typed string literals #187

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
- Add line and column number to TokenizerError (#194) - thanks @Dandandan!
- Use Token::EOF instead of Option<Token> (#195)
- Make the units keyword following `INTERVAL '...'` optional (#184) - thanks @maxcountryman!
- Generalize `DATE`/`TIME`/`TIMESTAMP` literals representation in the AST (`TypedString { data_type, value }`) and allow `DATE` and other keywords to be used as identifiers when not followed by a string (#187) - thanks @maxcountryman!

### Added
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
Expand All @@ -26,6 +28,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
- Support `LISTAGG()` (#174) - thanks @maxcountryman!
- Support the string concatentation operator `||` (#178) - thanks @Dandandan!
- Support bitwise AND (`&`), OR (`|`), XOR (`^`) (#181) - thanks @Dandandan!
- Add serde support to AST structs and enums (#196) - thanks @panarch!

### Fixed
- Report an error for unterminated string literals (#165)
Expand Down
8 changes: 8 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ pub enum Expr {
Nested(Box<Expr>),
/// A literal value, such as string, number, date or NULL
Value(Value),
/// A constant of form `<data_type> 'value'`.
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
/// as well as constants of other types (a non-standard PostgreSQL extension).
TypedString { data_type: DataType, value: String },
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
Expand Down Expand Up @@ -284,6 +288,10 @@ impl fmt::Display for Expr {
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),
Expr::Nested(ast) => write!(f, "({})", ast),
Expr::Value(v) => write!(f, "{}", v),
Expr::TypedString { data_type, value } => {
write!(f, "{}", data_type)?;
write!(f, " '{}'", &value::escape_single_quote_string(value))
}
Expr::Function(fun) => write!(f, "{}", fun),
Expr::Case {
operand,
Expand Down
9 changes: 0 additions & 9 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ pub enum Value {
HexStringLiteral(String),
/// Boolean value true or false
Boolean(bool),
/// `DATE '...'` literals
Date(String),
/// `TIME '...'` literals
Time(String),
/// `TIMESTAMP '...'` literals
Timestamp(String),
/// INTERVAL literals, roughly in the following format:
/// `INTERVAL '<value>' [ <leading_field> [ (<leading_precision>) ] ]
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
Expand Down Expand Up @@ -70,9 +64,6 @@ impl fmt::Display for Value {
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
Value::Boolean(v) => write!(f, "{}", v),
Value::Date(v) => write!(f, "DATE '{}'", escape_single_quote_string(v)),
Value::Time(v) => write!(f, "TIME '{}'", escape_single_quote_string(v)),
Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)),
Value::Interval {
value,
leading_field: Some(DateTimeField::Second),
Expand Down
107 changes: 78 additions & 29 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ macro_rules! parser_err {
};
}

// Returns a successful result if the optional expression is some
macro_rules! return_ok_if_some {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me know what you think about this name: it seemed a little closer to what's happening.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought maybe_parse could return a Result (return_if_ok!(self.maybe_parse(|parser| {), but this is probably fine too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh sure, happy to do it that way if you prefer!

($e:expr) => {{
if let Some(v) = $e {
return Ok(v);
}
}};
}

#[derive(PartialEq)]
pub enum IsOptional {
Optional,
Expand Down Expand Up @@ -172,6 +181,40 @@ impl Parser {

/// Parse an expression prefix
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
// string literal represents a literal of that type. Some examples:
//
// DATE '2020-05-20'
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
// BOOL 'true'
//
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
// matters is the fact that INTERVAL string literals may optionally be followed by special
// keywords, e.g.:
//
// INTERVAL '7' DAY
//
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
return_ok_if_some!(self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_literal_interval(),
// PosgreSQL allows almost any identifier to be used as custom data type name,
// and we support that in `parse_data_type()`. But unlike Postgres we don't
// have a list of globally reserved keywords (since they vary across dialects),
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
// `type 'string'` syntax for the custom data types at all.
Comment on lines +203 to +209
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reads so much better. Thank you!

DataType::Custom(..) => parser_err!("dummy"),
data_type => Ok(Expr::TypedString {
data_type,
value: parser.parse_literal_string()?,
}),
}
}));

let expr = match self.next_token() {
Token::Word(w) => match w.keyword {
Keyword::TRUE | Keyword::FALSE | Keyword::NULL => {
Expand All @@ -180,7 +223,6 @@ impl Parser {
}
Keyword::CASE => self.parse_case_expr(),
Keyword::CAST => self.parse_cast_expr(),
Keyword::DATE => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))),
Keyword::EXISTS => self.parse_exists_expr(),
Keyword::EXTRACT => self.parse_extract_expr(),
Keyword::INTERVAL => self.parse_literal_interval(),
Expand All @@ -189,10 +231,6 @@ impl Parser {
op: UnaryOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
}),
Keyword::TIME => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))),
Keyword::TIMESTAMP => {
Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?)))
}
// Here `w` is a word, check if it's a part of a multi-part
// identifier, a function call, or a simple identifier:
_ => match self.peek_token() {
Expand Down Expand Up @@ -907,6 +945,22 @@ impl Parser {
Ok(values)
}

/// Run a parser method `f`, reverting back to the current position
/// if unsuccessful.
#[must_use]
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
where
F: FnMut(&mut Parser) -> Result<T, ParserError>,
{
let index = self.index;
if let Ok(t) = f(self) {
Some(t)
} else {
self.index = index;
None
}
}

/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
Expand Down Expand Up @@ -1898,7 +1952,6 @@ impl Parser {
}

if self.consume_token(&Token::LParen) {
let index = self.index;
// A left paren introduces either a derived table (i.e., a subquery)
// or a nested join. It's nearly impossible to determine ahead of
// time which it is... so we just try to parse both.
Expand All @@ -1915,30 +1968,26 @@ impl Parser {
// | (2) starts a nested join
// (1) an additional set of parens around a nested join
//
match self.parse_derived_table_factor(NotLateral) {
// The recently consumed '(' started a derived table, and we've
// parsed the subquery, followed by the closing ')', and the
// alias of the derived table. In the example above this is
// case (3), and the next token would be `NATURAL`.
Ok(table_factor) => Ok(table_factor),
Err(_) => {
// A parsing error from `parse_derived_table_factor` indicates that
// the '(' we've recently consumed does not start a derived table
// (cases 1, 2, or 4). Ignore the error and back up to where we
// were before - right after the opening '('.
self.index = index;

// Inside the parentheses we expect to find a table factor
// followed by some joins or another level of nesting.
let table_and_joins = self.parse_table_and_joins()?;
self.expect_token(&Token::RParen)?;
// The SQL spec prohibits derived and bare tables from appearing
// alone in parentheses. We don't enforce this as some databases
// (e.g. Snowflake) allow such syntax.

Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
}
}
// If the recently consumed '(' starts a derived table, the call to
// `parse_derived_table_factor` below will return success after parsing the
// subquery, followed by the closing ')', and the alias of the derived table.
// In the example above this is case (3).
return_ok_if_some!(
self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))
);
// A parsing error from `parse_derived_table_factor` indicates that the '(' we've
// recently consumed does not start a derived table (cases 1, 2, or 4).
// `maybe_parse` will ignore such an error and rewind to be after the opening '('.

// Inside the parentheses we expect to find a table factor
// followed by some joins or another level of nesting.
let table_and_joins = self.parse_table_and_joins()?;
self.expect_token(&Token::RParen)?;
// The SQL spec prohibits derived and bare tables from appearing
// alone in parentheses. We don't enforce this as some databases
// (e.g. Snowflake) allow such syntax.
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
} else {
let name = self.parse_object_name()?;
// Postgres, MSSQL: table-valued functions:
Expand Down
34 changes: 28 additions & 6 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,19 @@ fn parse_null_in_select() {
);
}

#[test]
fn parse_select_with_date_column_name() {
let sql = "SELECT date";
let select = verified_only_select(sql);
assert_eq!(
&Expr::Identifier(Ident {
value: "date".into(),
quote_style: None
}),
expr_from_projection(only(&select.projection)),
);
}

#[test]
fn parse_escaped_single_quote_string_predicate() {
use self::BinaryOperator::*;
Expand Down Expand Up @@ -1426,30 +1439,39 @@ fn parse_literal_string() {

#[test]
fn parse_literal_date() {
let sql = "SELECT DATE '1999-01-01'";
let sql = "SELECT date '1999-01-01'";
let select = verified_only_select(sql);
assert_eq!(
&Expr::Value(Value::Date("1999-01-01".into())),
&Expr::TypedString {
data_type: DataType::Date,
value: "1999-01-01".into()
},
expr_from_projection(only(&select.projection)),
);
}

#[test]
fn parse_literal_time() {
let sql = "SELECT TIME '01:23:34'";
let sql = "SELECT time '01:23:34'";
let select = verified_only_select(sql);
assert_eq!(
&Expr::Value(Value::Time("01:23:34".into())),
&Expr::TypedString {
data_type: DataType::Time,
value: "01:23:34".into()
},
expr_from_projection(only(&select.projection)),
);
}

#[test]
fn parse_literal_timestamp() {
let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'";
let sql = "SELECT timestamp '1999-01-01 01:23:34'";
let select = verified_only_select(sql);
assert_eq!(
&Expr::Value(Value::Timestamp("1999-01-01 01:23:34".into())),
&Expr::TypedString {
data_type: DataType::Timestamp,
value: "1999-01-01 01:23:34".into()
},
expr_from_projection(only(&select.projection)),
);
}
Expand Down