Skip to content

Use dialects in the parser for support snowflake aliasing syntax #244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,11 @@ pub enum TableFactor {
subquery: Box<Query>,
alias: Option<TableAlias>,
},
/// Represents a parenthesized table factor. The SQL spec only allows a
/// join expression (`(foo <JOIN> bar [ <JOIN> baz ... ])`) to be nested,
/// possibly several times, but the parser also accepts the non-standard
/// nesting of bare tables (`table_with_joins.joins.is_empty()`), so the
/// name `NestedJoin` is a bit of misnomer.
/// The inner `TableWithJoins` can have no joins only if its
/// `relation` is itself a `TableFactor::NestedJoin`.
/// Some dialects allow nesting lone `Table`/`Derived` in parens,
/// e.g. `FROM (mytable)`, but we don't expose the presence of these
/// extraneous parens in the AST.
NestedJoin(Box<TableWithJoins>),
}

Expand Down
7 changes: 6 additions & 1 deletion src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ pub mod keywords;
mod mssql;
mod mysql;
mod postgresql;

mod snowflake;
use std::fmt::Debug;

pub use self::ansi::AnsiDialect;
pub use self::generic::GenericDialect;
pub use self::mssql::MsSqlDialect;
pub use self::mysql::MySqlDialect;
pub use self::postgresql::PostgreSqlDialect;
pub use self::snowflake::SnowflakeDialect;

pub trait Dialect: Debug {
/// Determine if a character starts a quoted identifier. The default
Expand All @@ -38,4 +39,8 @@ pub trait Dialect: Debug {
fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid unquoted identifier character
fn is_identifier_part(&self, ch: char) -> bool;

fn alllow_single_table_in_parenthesis(&self) -> bool {
false
}
Comment on lines +43 to +45
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please note #241 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry :| - I don't understand what you mean by that ..
(to note #241 in code comment ? or something in the code is not aligned with what you commented in #241 (comment) )
Can you please elborate ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that comment I tried to argue for using if self.dialect <is snowflake> checks rather than self.dialect.alllow_single_table_in_parenthesis(), at least until we better understand when to use which approach.

}
26 changes: 26 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use crate::dialect::Dialect;

#[derive(Debug, Default)]
pub struct SnowflakeDialect;
Comment on lines +3 to +4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like the new dialect (including a separate test file), the dialect-specific parsing infrastructure, and the table factor parsing fix to be landed separately.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean by different PR's or commits ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant PRs.


impl Dialect for SnowflakeDialect {
//Revisit: currently copied from Genric dialect
fn is_identifier_start(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_' || ch == '#' || ch == '@'
}

//Revisit: currently copied from Genric dialect
fn is_identifier_part(&self, ch: char) -> bool {
(ch >= 'a' && ch <= 'z')
|| (ch >= 'A' && ch <= 'Z')
|| (ch >= '0' && ch <= '9')
|| ch == '@'
|| ch == '$'
|| ch == '#'
|| ch == '_'
}

fn alllow_single_table_in_parenthesis(&self) -> bool {
true
}
}
76 changes: 62 additions & 14 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,28 @@ impl fmt::Display for ParserError {
impl Error for ParserError {}

/// SQL Parser
pub struct Parser {
pub struct Parser<'a> {
tokens: Vec<Token>,
/// The index of the first unprocessed token in `self.tokens`
index: usize,
dialect: &'a dyn Dialect,
}

impl Parser {
impl<'a> Parser<'a> {
/// Parse the specified tokens
pub fn new(tokens: Vec<Token>) -> Self {
Parser { tokens, index: 0 }
pub fn new(tokens: Vec<Token>, dialect: &'a dyn Dialect) -> Self {
Parser {
tokens,
index: 0,
dialect,
}
}

/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
pub fn parse_sql(dialect: &dyn Dialect, sql: &str) -> Result<Vec<Statement>, ParserError> {
let mut tokenizer = Tokenizer::new(dialect, &sql);
let tokens = tokenizer.tokenize()?;
let mut parser = Parser::new(tokens);
let mut parser = Parser::new(tokens, dialect);
let mut stmts = Vec::new();
let mut expecting_statement_delimiter = false;
debug!("Parsing sql '{}'...", sql);
Expand Down Expand Up @@ -950,7 +955,7 @@ impl Parser {
/// Parse a comma-separated list of 1+ items accepted by `F`
pub fn parse_comma_separated<T, F>(&mut self, mut f: F) -> Result<Vec<T>, ParserError>
where
F: FnMut(&mut Parser) -> Result<T, ParserError>,
F: FnMut(&mut Parser<'a>) -> Result<T, ParserError>,
{
let mut values = vec![];
loop {
Expand Down Expand Up @@ -2056,6 +2061,7 @@ impl Parser {
};
joins.push(join);
}

Ok(TableWithJoins { relation, joins })
}

Expand Down Expand Up @@ -2098,14 +2104,56 @@ impl Parser {
// recently consumed does not start a derived table (cases 1, 2, or 4).
// `maybe_parse` will ignore such an error and rewind to be after the opening '('.

// Inside the parentheses we expect to find a table factor
// followed by some joins or another level of nesting.
let table_and_joins = self.parse_table_and_joins()?;
self.expect_token(&Token::RParen)?;
// The SQL spec prohibits derived and bare tables from appearing
// alone in parentheses. We don't enforce this as some databases
// (e.g. Snowflake) allow such syntax.
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
// Inside the parentheses we expect to find an (A) table factor
// followed by some joins or (B) another level of nesting.
let mut table_and_joins = self.parse_table_and_joins()?;

if !table_and_joins.joins.is_empty() {
self.expect_token(&Token::RParen)?;
Ok(TableFactor::NestedJoin(Box::new(table_and_joins))) // (A)
} else if let TableFactor::NestedJoin(_) = &table_and_joins.relation {
// (B): `table_and_joins` (what we found inside the parentheses)
// is a nested join `(foo JOIN bar)`, not followed by other joins.
self.expect_token(&Token::RParen)?;
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
} else if self.dialect.alllow_single_table_in_parenthesis() {
// Dialect-specific behavior: Snowflake diverges from the
// standard and most of other implementations by allowing
// extra parentheses not only around a join (B), but around
// lone table names (e.g. `FROM (mytable [AS alias])`) and
// around derived tables (e.g. `FROM ((SELECT ...) [AS alias])`
// as well.
self.expect_token(&Token::RParen)?;

if let Some(outer_alias) =
self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?
{
// Snowflake also allows specifying an alias *after* parens
// e.g. `FROM (mytable) AS alias`
match &mut table_and_joins.relation {
TableFactor::Derived { alias, .. } | TableFactor::Table { alias, .. } => {
// but not `FROM (mytable AS alias1) AS alias2`.
if let Some(inner_alias) = alias {
return Err(ParserError::ParserError(format!(
"duplicate alias {}",
inner_alias
)));
}
// Act as if the alias was specified normally next
// to the table name: `(mytable) AS alias` ->
// `(mytable AS alias)`
alias.replace(outer_alias);
}
TableFactor::NestedJoin(_) => unreachable!(),
};
}
// Do not store the extra set of parens in the AST
Ok(table_and_joins.relation)
} else {
// The SQL spec prohibits derived tables and bare tables from
// appearing alone in parentheses (e.g. `FROM (mytable)`)
self.expected("joined table", self.peek_token())
}
} else {
let name = self.parse_object_name()?;
// Postgres, MSSQL: table-valued functions:
Expand Down
6 changes: 4 additions & 2 deletions src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl TestedDialects {
self.one_of_identical_results(|dialect| {
let mut tokenizer = Tokenizer::new(dialect, sql);
let tokens = tokenizer.tokenize().unwrap();
f(&mut Parser::new(tokens))
f(&mut Parser::new(tokens, dialect))
})
}

Expand Down Expand Up @@ -104,7 +104,9 @@ impl TestedDialects {
/// Ensures that `sql` parses as an expression, and is not modified
/// after a serialization round-trip.
pub fn verified_expr(&self, sql: &str) -> Expr {
let ast = self.run_parser_method(sql, Parser::parse_expr).unwrap();
let ast = self
.run_parser_method(sql, |parser| parser.parse_expr())
.unwrap();
assert_eq!(sql, &ast.to_string(), "round-tripping without changes");
ast
}
Expand Down
46 changes: 10 additions & 36 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use matches::assert_matches;

use sqlparser::ast::*;
use sqlparser::dialect::keywords::ALL_KEYWORDS;
use sqlparser::parser::{Parser, ParserError};
use sqlparser::parser::ParserError;
use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only};

#[test]
Expand Down Expand Up @@ -147,13 +147,14 @@ fn parse_update() {

#[test]
fn parse_invalid_table_name() {
let ast = all_dialects().run_parser_method("db.public..customer", Parser::parse_object_name);
let ast = all_dialects()
.run_parser_method("db.public..customer", |parser| parser.parse_object_name());
assert!(ast.is_err());
}

#[test]
fn parse_no_table_name() {
let ast = all_dialects().run_parser_method("", Parser::parse_object_name);
let ast = all_dialects().run_parser_method("", |parser| parser.parse_object_name());
assert!(ast.is_err());
}

Expand Down Expand Up @@ -2273,19 +2274,12 @@ fn parse_join_nesting() {
vec![join(nest!(nest!(nest!(table("b"), table("c")))))]
);

// Parenthesized table names are non-standard, but supported in Snowflake SQL
let sql = "SELECT * FROM (a NATURAL JOIN (b))";
let select = verified_only_select(sql);
let from = only(select.from);

assert_eq!(from.relation, nest!(table("a"), nest!(table("b"))));

// Double parentheses around table names are non-standard, but supported in Snowflake SQL
let sql = "SELECT * FROM (a NATURAL JOIN ((b)))";
let select = verified_only_select(sql);
let from = only(select.from);

assert_eq!(from.relation, nest!(table("a"), nest!(nest!(table("b")))));
// Nesting a subquery in parentheses is non-standard, but supported in Snowflake SQL
let res = parse_sql_statements("SELECT * FROM ((SELECT 1) AS t)");
assert_eq!(
ParserError::ParserError("Expected joined table, found: )".to_string()),
res.unwrap_err()
);
}

#[test]
Expand Down Expand Up @@ -2427,26 +2421,6 @@ fn parse_derived_tables() {
}],
}))
);

// Nesting a subquery in parentheses is non-standard, but supported in Snowflake SQL
let sql = "SELECT * FROM ((SELECT 1) AS t)";
let select = verified_only_select(sql);
let from = only(select.from);

assert_eq!(
from.relation,
TableFactor::NestedJoin(Box::new(TableWithJoins {
relation: TableFactor::Derived {
lateral: false,
subquery: Box::new(verified_query("SELECT 1")),
alias: Some(TableAlias {
name: "t".into(),
columns: vec![],
})
},
joins: Vec::new(),
}))
);
}

#[test]
Expand Down
Loading