Skip to content

Commit 6cdd4a1

Browse files
maxcountrymanbeneschnickolay
authored
Support general "typed string" literals (#187)
Fixes #168 by enabling `DATE` and other keywords to be used as identifiers when not followed by a string literal. A "typed string" is our term for generalized version of `DATE '...'`/`TIME '...'`/ `TIMESTAMP '...'` literals, represented as `TypedString { data_type, value }` in the AST. Unlike DATE/TIME/TIMESTAMP literals, this is a non-standard extension supported by PostgreSQL at least. This is a port of MaterializeInc/materialize#3146 Co-authored-by: Nikhil Benesch <[email protected]> Co-authored-by: Nickolay Ponomarev <[email protected]>
1 parent 34548e8 commit 6cdd4a1

File tree

5 files changed

+117
-44
lines changed

5 files changed

+117
-44
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
1313
- Change `Ident` (previously a simple `String`) to store the parsed (unquoted) `value` of the identifier and the `quote_style` separately (#143) - thanks @apparebit!
1414
- Support Snowflake's `FROM (table_name)` (#155) - thanks @eyalleshem!
1515
- Add line and column number to TokenizerError (#194) - thanks @Dandandan!
16+
- Use Token::EOF instead of Option<Token> (#195)
1617
- Make the units keyword following `INTERVAL '...'` optional (#184) - thanks @maxcountryman!
18+
- Generalize `DATE`/`TIME`/`TIMESTAMP` literals representation in the AST (`TypedString { data_type, value }`) and allow `DATE` and other keywords to be used as identifiers when not followed by a string (#187) - thanks @maxcountryman!
1719

1820
### Added
1921
- Support MSSQL `TOP (<N>) [ PERCENT ] [ WITH TIES ]` (#150) - thanks @alexkyllo!
@@ -26,6 +28,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
2628
- Support `LISTAGG()` (#174) - thanks @maxcountryman!
2729
- Support the string concatentation operator `||` (#178) - thanks @Dandandan!
2830
- Support bitwise AND (`&`), OR (`|`), XOR (`^`) (#181) - thanks @Dandandan!
31+
- Add serde support to AST structs and enums (#196) - thanks @panarch!
2932

3033
### Fixed
3134
- Report an error for unterminated string literals (#165)

src/ast/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ pub enum Expr {
210210
Nested(Box<Expr>),
211211
/// A literal value, such as string, number, date or NULL
212212
Value(Value),
213+
/// A constant of form `<data_type> 'value'`.
214+
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
215+
/// as well as constants of other types (a non-standard PostgreSQL extension).
216+
TypedString { data_type: DataType, value: String },
213217
/// Scalar function call e.g. `LEFT(foo, 5)`
214218
Function(Function),
215219
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
@@ -284,6 +288,10 @@ impl fmt::Display for Expr {
284288
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),
285289
Expr::Nested(ast) => write!(f, "({})", ast),
286290
Expr::Value(v) => write!(f, "{}", v),
291+
Expr::TypedString { data_type, value } => {
292+
write!(f, "{}", data_type)?;
293+
write!(f, " '{}'", &value::escape_single_quote_string(value))
294+
}
287295
Expr::Function(fun) => write!(f, "{}", fun),
288296
Expr::Case {
289297
operand,

src/ast/value.rs

-9
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,6 @@ pub enum Value {
3333
HexStringLiteral(String),
3434
/// Boolean value true or false
3535
Boolean(bool),
36-
/// `DATE '...'` literals
37-
Date(String),
38-
/// `TIME '...'` literals
39-
Time(String),
40-
/// `TIMESTAMP '...'` literals
41-
Timestamp(String),
4236
/// INTERVAL literals, roughly in the following format:
4337
/// `INTERVAL '<value>' [ <leading_field> [ (<leading_precision>) ] ]
4438
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
@@ -70,9 +64,6 @@ impl fmt::Display for Value {
7064
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
7165
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
7266
Value::Boolean(v) => write!(f, "{}", v),
73-
Value::Date(v) => write!(f, "DATE '{}'", escape_single_quote_string(v)),
74-
Value::Time(v) => write!(f, "TIME '{}'", escape_single_quote_string(v)),
75-
Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)),
7667
Value::Interval {
7768
value,
7869
leading_field: Some(DateTimeField::Second),

src/parser.rs

+78-29
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,15 @@ macro_rules! parser_err {
3535
};
3636
}
3737

38+
// Returns a successful result if the optional expression is some
39+
macro_rules! return_ok_if_some {
40+
($e:expr) => {{
41+
if let Some(v) = $e {
42+
return Ok(v);
43+
}
44+
}};
45+
}
46+
3847
#[derive(PartialEq)]
3948
pub enum IsOptional {
4049
Optional,
@@ -172,6 +181,40 @@ impl Parser {
172181

173182
/// Parse an expression prefix
174183
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
184+
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
185+
// string literal represents a literal of that type. Some examples:
186+
//
187+
// DATE '2020-05-20'
188+
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
189+
// BOOL 'true'
190+
//
191+
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
192+
// matters is the fact that INTERVAL string literals may optionally be followed by special
193+
// keywords, e.g.:
194+
//
195+
// INTERVAL '7' DAY
196+
//
197+
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
198+
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
199+
// expression that should parse as the column name "date".
200+
return_ok_if_some!(self.maybe_parse(|parser| {
201+
match parser.parse_data_type()? {
202+
DataType::Interval => parser.parse_literal_interval(),
203+
// PosgreSQL allows almost any identifier to be used as custom data type name,
204+
// and we support that in `parse_data_type()`. But unlike Postgres we don't
205+
// have a list of globally reserved keywords (since they vary across dialects),
206+
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
207+
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
208+
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
209+
// `type 'string'` syntax for the custom data types at all.
210+
DataType::Custom(..) => parser_err!("dummy"),
211+
data_type => Ok(Expr::TypedString {
212+
data_type,
213+
value: parser.parse_literal_string()?,
214+
}),
215+
}
216+
}));
217+
175218
let expr = match self.next_token() {
176219
Token::Word(w) => match w.keyword {
177220
Keyword::TRUE | Keyword::FALSE | Keyword::NULL => {
@@ -180,7 +223,6 @@ impl Parser {
180223
}
181224
Keyword::CASE => self.parse_case_expr(),
182225
Keyword::CAST => self.parse_cast_expr(),
183-
Keyword::DATE => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))),
184226
Keyword::EXISTS => self.parse_exists_expr(),
185227
Keyword::EXTRACT => self.parse_extract_expr(),
186228
Keyword::INTERVAL => self.parse_literal_interval(),
@@ -189,10 +231,6 @@ impl Parser {
189231
op: UnaryOperator::Not,
190232
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
191233
}),
192-
Keyword::TIME => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))),
193-
Keyword::TIMESTAMP => {
194-
Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?)))
195-
}
196234
// Here `w` is a word, check if it's a part of a multi-part
197235
// identifier, a function call, or a simple identifier:
198236
_ => match self.peek_token() {
@@ -907,6 +945,22 @@ impl Parser {
907945
Ok(values)
908946
}
909947

948+
/// Run a parser method `f`, reverting back to the current position
949+
/// if unsuccessful.
950+
#[must_use]
951+
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
952+
where
953+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
954+
{
955+
let index = self.index;
956+
if let Ok(t) = f(self) {
957+
Some(t)
958+
} else {
959+
self.index = index;
960+
None
961+
}
962+
}
963+
910964
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
911965
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
912966
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
@@ -1898,7 +1952,6 @@ impl Parser {
18981952
}
18991953

19001954
if self.consume_token(&Token::LParen) {
1901-
let index = self.index;
19021955
// A left paren introduces either a derived table (i.e., a subquery)
19031956
// or a nested join. It's nearly impossible to determine ahead of
19041957
// time which it is... so we just try to parse both.
@@ -1915,30 +1968,26 @@ impl Parser {
19151968
// | (2) starts a nested join
19161969
// (1) an additional set of parens around a nested join
19171970
//
1918-
match self.parse_derived_table_factor(NotLateral) {
1919-
// The recently consumed '(' started a derived table, and we've
1920-
// parsed the subquery, followed by the closing ')', and the
1921-
// alias of the derived table. In the example above this is
1922-
// case (3), and the next token would be `NATURAL`.
1923-
Ok(table_factor) => Ok(table_factor),
1924-
Err(_) => {
1925-
// A parsing error from `parse_derived_table_factor` indicates that
1926-
// the '(' we've recently consumed does not start a derived table
1927-
// (cases 1, 2, or 4). Ignore the error and back up to where we
1928-
// were before - right after the opening '('.
1929-
self.index = index;
1930-
1931-
// Inside the parentheses we expect to find a table factor
1932-
// followed by some joins or another level of nesting.
1933-
let table_and_joins = self.parse_table_and_joins()?;
1934-
self.expect_token(&Token::RParen)?;
1935-
// The SQL spec prohibits derived and bare tables from appearing
1936-
// alone in parentheses. We don't enforce this as some databases
1937-
// (e.g. Snowflake) allow such syntax.
19381971

1939-
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
1940-
}
1941-
}
1972+
// If the recently consumed '(' starts a derived table, the call to
1973+
// `parse_derived_table_factor` below will return success after parsing the
1974+
// subquery, followed by the closing ')', and the alias of the derived table.
1975+
// In the example above this is case (3).
1976+
return_ok_if_some!(
1977+
self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))
1978+
);
1979+
// A parsing error from `parse_derived_table_factor` indicates that the '(' we've
1980+
// recently consumed does not start a derived table (cases 1, 2, or 4).
1981+
// `maybe_parse` will ignore such an error and rewind to be after the opening '('.
1982+
1983+
// Inside the parentheses we expect to find a table factor
1984+
// followed by some joins or another level of nesting.
1985+
let table_and_joins = self.parse_table_and_joins()?;
1986+
self.expect_token(&Token::RParen)?;
1987+
// The SQL spec prohibits derived and bare tables from appearing
1988+
// alone in parentheses. We don't enforce this as some databases
1989+
// (e.g. Snowflake) allow such syntax.
1990+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
19421991
} else {
19431992
let name = self.parse_object_name()?;
19441993
// Postgres, MSSQL: table-valued functions:

tests/sqlparser_common.rs

+28-6
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,19 @@ fn parse_null_in_select() {
413413
);
414414
}
415415

416+
#[test]
417+
fn parse_select_with_date_column_name() {
418+
let sql = "SELECT date";
419+
let select = verified_only_select(sql);
420+
assert_eq!(
421+
&Expr::Identifier(Ident {
422+
value: "date".into(),
423+
quote_style: None
424+
}),
425+
expr_from_projection(only(&select.projection)),
426+
);
427+
}
428+
416429
#[test]
417430
fn parse_escaped_single_quote_string_predicate() {
418431
use self::BinaryOperator::*;
@@ -1426,30 +1439,39 @@ fn parse_literal_string() {
14261439

14271440
#[test]
14281441
fn parse_literal_date() {
1429-
let sql = "SELECT DATE '1999-01-01'";
1442+
let sql = "SELECT date '1999-01-01'";
14301443
let select = verified_only_select(sql);
14311444
assert_eq!(
1432-
&Expr::Value(Value::Date("1999-01-01".into())),
1445+
&Expr::TypedString {
1446+
data_type: DataType::Date,
1447+
value: "1999-01-01".into()
1448+
},
14331449
expr_from_projection(only(&select.projection)),
14341450
);
14351451
}
14361452

14371453
#[test]
14381454
fn parse_literal_time() {
1439-
let sql = "SELECT TIME '01:23:34'";
1455+
let sql = "SELECT time '01:23:34'";
14401456
let select = verified_only_select(sql);
14411457
assert_eq!(
1442-
&Expr::Value(Value::Time("01:23:34".into())),
1458+
&Expr::TypedString {
1459+
data_type: DataType::Time,
1460+
value: "01:23:34".into()
1461+
},
14431462
expr_from_projection(only(&select.projection)),
14441463
);
14451464
}
14461465

14471466
#[test]
14481467
fn parse_literal_timestamp() {
1449-
let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'";
1468+
let sql = "SELECT timestamp '1999-01-01 01:23:34'";
14501469
let select = verified_only_select(sql);
14511470
assert_eq!(
1452-
&Expr::Value(Value::Timestamp("1999-01-01 01:23:34".into())),
1471+
&Expr::TypedString {
1472+
data_type: DataType::Timestamp,
1473+
value: "1999-01-01 01:23:34".into()
1474+
},
14531475
expr_from_projection(only(&select.projection)),
14541476
);
14551477
}

0 commit comments

Comments
 (0)