diff --git a/examples/cli.rs b/examples/cli.rs index 5a3a3034b..9ac079949 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -40,6 +40,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), + "--hive" => Box::new(HiveDialect {}), "--generic" | "" => Box::new(GenericDialect {}), s => panic!("Unexpected parameter: {}", s), }; diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 53122ab5d..388703e76 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -61,6 +61,8 @@ pub enum DataType { Regclass, /// Text Text, + /// String + String, /// Bytea Bytea, /// Custom type such as enums @@ -101,6 +103,7 @@ impl fmt::Display for DataType { DataType::Interval => write!(f, "INTERVAL"), DataType::Regclass => write!(f, "REGCLASS"), DataType::Text => write!(f, "TEXT"), + DataType::String => write!(f, "STRING"), DataType::Bytea => write!(f, "BYTEA"), DataType::Array(ty) => write!(f, "{}[]", ty), DataType::Custom(ty) => write!(f, "{}", ty), diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 272bf7c25..67dc2e322 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -35,22 +35,54 @@ pub enum AlterTableOperation { if_exists: bool, cascade: bool, }, + /// `RENAME TO PARTITION (partition=val)` + RenamePartitions { + old_partitions: Vec, + new_partitions: Vec, + }, + /// Add Partitions + AddPartitions { + if_not_exists: bool, + new_partitions: Vec, + }, + DropPartitions { + partitions: Vec, + if_exists: bool, + }, /// `RENAME [ COLUMN ] TO ` RenameColumn { old_column_name: Ident, new_column_name: Ident, }, /// `RENAME TO ` - RenameTable { table_name: Ident }, + RenameTable { table_name: ObjectName }, } impl fmt::Display for AlterTableOperation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions, + } => write!( + f, + "ADD{ine} PARTITION ({})", + display_comma_separated(new_partitions), + ine = if *if_not_exists { " IF NOT EXISTS" } else { "" } + ), AlterTableOperation::AddConstraint(c) => write!(f, "ADD {}", c), AlterTableOperation::AddColumn { column_def } => { write!(f, "ADD COLUMN {}", column_def.to_string()) } + AlterTableOperation::DropPartitions { + partitions, + if_exists, + } => write!( + f, + "DROP{ie} PARTITION ({})", + display_comma_separated(partitions), + ie = if *if_exists { " IF EXISTS" } else { "" } + ), AlterTableOperation::DropConstraint { name } => write!(f, "DROP CONSTRAINT {}", name), AlterTableOperation::DropColumn { column_name, @@ -63,6 +95,15 @@ impl fmt::Display for AlterTableOperation { column_name, if *cascade { " CASCADE" } else { "" } ), + AlterTableOperation::RenamePartitions { + old_partitions, + new_partitions, + } => write!( + f, + "PARTITION ({}) RENAME TO PARTITION ({})", + display_comma_separated(old_partitions), + display_comma_separated(new_partitions) + ), AlterTableOperation::RenameColumn { old_column_name, new_column_name, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4232ad022..1999451d5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -29,8 +29,9 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - Cte, Fetch, Join, JoinConstraint, JoinOperator, Offset, OffsetRows, OrderByExpr, Query, Select, - SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, Values, With, + Cte, Fetch, Join, JoinConstraint, JoinOperator, LateralView, Offset, OffsetRows, OrderByExpr, + Query, Select, SelectItem, SetExpr, SetOperator, TableAlias, TableFactor, TableWithJoins, Top, + Values, With, }; pub use self::value::{DateTimeField, Value}; @@ -191,7 +192,10 @@ pub enum Expr { right: Box, }, /// Unary operation e.g. `NOT foo` - UnaryOp { op: UnaryOperator, expr: Box }, + UnaryOp { + op: UnaryOperator, + expr: Box, + }, /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { expr: Box, @@ -213,7 +217,14 @@ pub enum Expr { /// A constant of form ` 'value'`. /// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`), /// as well as constants of other types (a non-standard PostgreSQL extension). - TypedString { data_type: DataType, value: String }, + TypedString { + data_type: DataType, + value: String, + }, + MapAccess { + column: Box, + key: String, + }, /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// `CASE [] WHEN THEN ... [ELSE ] END` @@ -241,6 +252,7 @@ impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Identifier(s) => write!(f, "{}", s), + Expr::MapAccess { column, key } => write!(f, "{}[\"{}\"]", column, key), Expr::Wildcard => f.write_str("*"), Expr::QualifiedWildcard(q) => write!(f, "{}.*", display_separated(q, ".")), Expr::CompoundIdentifier(s) => write!(f, "{}", display_separated(s, ".")), @@ -426,11 +438,50 @@ impl fmt::Display for WindowFrameBound { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum AddDropSync { + ADD, + DROP, + SYNC, +} + +impl fmt::Display for AddDropSync { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AddDropSync::SYNC => f.write_str("SYNC PARTITIONS"), + AddDropSync::DROP => f.write_str("DROP PARTITIONS"), + AddDropSync::ADD => f.write_str("ADD PARTITIONS"), + } + } +} + /// A top-level statement (SELECT, INSERT, CREATE, etc.) #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Statement { + /// Analyze (Hive) + Analyze { + table_name: ObjectName, + partitions: Option>, + for_columns: bool, + columns: Vec, + cache_metadata: bool, + noscan: bool, + compute_statistics: bool, + }, + /// Truncate (Hive) + Truncate { + table_name: ObjectName, + partitions: Option>, + }, + /// Msck (Hive) + Msck { + table_name: ObjectName, + repair: bool, + partition_action: Option, + }, /// SELECT Query(Box), /// INSERT @@ -439,8 +490,24 @@ pub enum Statement { table_name: ObjectName, /// COLUMNS columns: Vec, + /// Overwrite (Hive) + overwrite: bool, /// A SQL query that specifies what to insert source: Box, + /// partitioned insert (Hive) + partitioned: Option>, + /// Columns defined after PARTITION + after_columns: Vec, + /// whether the insert has the table keyword (Hive) + table: bool, + }, + // TODO: Support ROW FORMAT + Directory { + overwrite: bool, + local: bool, + path: String, + file_format: Option, + source: Box, }, Copy { /// TABLE @@ -479,6 +546,7 @@ pub enum Statement { /// CREATE TABLE CreateTable { or_replace: bool, + temporary: bool, external: bool, if_not_exists: bool, /// Table name @@ -486,11 +554,15 @@ pub enum Statement { /// Optional schema columns: Vec, constraints: Vec, + hive_distribution: HiveDistributionStyle, + hive_formats: Option, + table_properties: Vec, with_options: Vec, file_format: Option, location: Option, query: Option>, without_rowid: bool, + like: Option, }, /// SQLite's `CREATE VIRTUAL TABLE .. USING ()` CreateVirtualTable { @@ -525,6 +597,9 @@ pub enum Statement { /// Whether `CASCADE` was specified. This will be `false` when /// `RESTRICT` or no drop behavior at all was specified. cascade: bool, + /// Hive allows you specify whether the table's stored data will be + /// deleted along with the dropped table + purge: bool, }, /// SET /// @@ -533,8 +608,9 @@ pub enum Statement { /// supported yet. SetVariable { local: bool, + hivevar: bool, variable: Ident, - value: SetVariableValue, + value: Vec, }, /// SHOW /// @@ -562,6 +638,13 @@ pub enum Statement { schema_name: ObjectName, if_not_exists: bool, }, + /// CREATE DATABASE + CreateDatabase { + db_name: ObjectName, + if_not_exists: bool, + location: Option, + managed_location: Option, + }, /// `ASSERT [AS ]` Assert { condition: Expr, @@ -592,11 +675,6 @@ pub enum Statement { /// A SQL query that specifies what to explain statement: Box, }, - /// ANALYZE - Analyze { - /// Name of table - table_name: ObjectName, - }, } impl fmt::Display for Statement { @@ -622,17 +700,114 @@ impl fmt::Display for Statement { write!(f, "{}", statement) } - Statement::Analyze { table_name } => write!(f, "ANALYZE TABLE {}", table_name), Statement::Query(s) => write!(f, "{}", s), + Statement::Directory { + overwrite, + local, + path, + file_format, + source, + } => { + write!( + f, + "INSERT{overwrite}{local} DIRECTORY '{path}'", + overwrite = if *overwrite { " OVERWRITE" } else { "" }, + local = if *local { " LOCAL" } else { "" }, + path = path + )?; + if let Some(ref ff) = file_format { + write!(f, " STORED AS {}", ff)? + } + write!(f, " {}", source) + } + Statement::Msck { + table_name, + repair, + partition_action, + } => { + write!( + f, + "MSCK {repair}TABLE {table}", + repair = if *repair { "REPAIR " } else { "" }, + table = table_name + )?; + if let Some(pa) = partition_action { + write!(f, " {}", pa)?; + } + Ok(()) + } + Statement::Truncate { + table_name, + partitions, + } => { + write!(f, "TRUNCATE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + Ok(()) + } + Statement::Analyze { + table_name, + partitions, + for_columns, + columns, + cache_metadata, + noscan, + compute_statistics, + } => { + write!(f, "ANALYZE TABLE {}", table_name)?; + if let Some(ref parts) = partitions { + if !parts.is_empty() { + write!(f, " PARTITION ({})", display_comma_separated(parts))?; + } + } + + if *compute_statistics { + write!(f, " COMPUTE STATISTICS")?; + } + if *noscan { + write!(f, " NOSCAN")?; + } + if *cache_metadata { + write!(f, " CACHE METADATA")?; + } + if *for_columns { + write!(f, " FOR COLUMNS")?; + if !columns.is_empty() { + write!(f, " {}", display_comma_separated(columns))?; + } + } + Ok(()) + } Statement::Insert { table_name, + overwrite, + partitioned, columns, + after_columns, source, + table, } => { - write!(f, "INSERT INTO {} ", table_name)?; + write!( + f, + "INSERT {act}{tbl} {table_name} ", + table_name = table_name, + act = if *overwrite { "OVERWRITE" } else { "INTO" }, + tbl = if *table { " TABLE" } else { "" } + )?; if !columns.is_empty() { write!(f, "({}) ", display_comma_separated(columns))?; } + if let Some(ref parts) = partitioned { + if !parts.is_empty() { + write!(f, "PARTITION ({}) ", display_comma_separated(parts))?; + } + } + if !after_columns.is_empty() { + write!(f, "({}) ", display_comma_separated(after_columns))?; + } write!(f, "{}", source) } Statement::Copy { @@ -684,6 +859,25 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::CreateDatabase { + db_name, + if_not_exists, + location, + managed_location, + } => { + write!(f, "CREATE")?; + if *if_not_exists { + write!(f, " IF NOT EXISTS")?; + } + write!(f, " {}", db_name)?; + if let Some(l) = location { + write!(f, " LOCATION '{}'", l)?; + } + if let Some(ml) = managed_location { + write!(f, " MANAGEDLOCATION '{}'", ml)?; + } + Ok(()) + } Statement::CreateView { name, or_replace, @@ -711,14 +905,19 @@ impl fmt::Display for Statement { name, columns, constraints, + table_properties, with_options, or_replace, if_not_exists, + hive_distribution, + hive_formats, external, + temporary, file_format, location, query, without_rowid, + like, } => { // We want to allow the following options // Empty column list, allowed by PostgreSQL: @@ -729,10 +928,11 @@ impl fmt::Display for Statement { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{temporary}TABLE {if_not_exists}{name}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, external = if *external { "EXTERNAL " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + temporary = if *temporary { "TEMPORARY " } else { "" }, name = name, )?; if !columns.is_empty() || !constraints.is_empty() { @@ -741,7 +941,7 @@ impl fmt::Display for Statement { write!(f, ", ")?; } write!(f, "{})", display_comma_separated(constraints))?; - } else if query.is_none() { + } else if query.is_none() && like.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; } @@ -749,6 +949,79 @@ impl fmt::Display for Statement { if *without_rowid { write!(f, " WITHOUT ROWID")?; } + + // Only for Hive + if let Some(l) = like { + write!(f, " LIKE {}", l)?; + } + match hive_distribution { + HiveDistributionStyle::PARTITIONED { columns } => { + write!(f, " PARTITIONED BY ({})", display_comma_separated(&columns))?; + } + HiveDistributionStyle::CLUSTERED { + columns, + sorted_by, + num_buckets, + } => { + write!(f, " CLUSTERED BY ({})", display_comma_separated(&columns))?; + if !sorted_by.is_empty() { + write!(f, " SORTED BY ({})", display_comma_separated(&sorted_by))?; + } + if *num_buckets > 0 { + write!(f, " INTO {} BUCKETS", num_buckets)?; + } + } + HiveDistributionStyle::SKEWED { + columns, + on, + stored_as_directories, + } => { + write!( + f, + " SKEWED BY ({})) ON ({})", + display_comma_separated(&columns), + display_comma_separated(&on) + )?; + if *stored_as_directories { + write!(f, " STORED AS DIRECTORIES")?; + } + } + _ => (), + } + + if let Some(HiveFormat { + row_format, + storage, + location, + }) = hive_formats + { + match row_format { + Some(HiveRowFormat::SERDE { class }) => { + write!(f, " ROW FORMAT SERDE '{}'", class)? + } + Some(HiveRowFormat::DELIMITED) => write!(f, " ROW FORMAT DELIMITED")?, + None => (), + } + match storage { + Some(HiveIOFormat::IOF { + input_format, + output_format, + }) => write!( + f, + " STORED AS INPUTFORMAT {} OUTPUTFORMAT {}", + input_format, output_format + )?, + Some(HiveIOFormat::FileFormat { format }) if !*external => { + write!(f, " STORED AS {}", format)? + } + _ => (), + } + if !*external { + if let Some(loc) = location { + write!(f, " LOCATION '{}'", loc)?; + } + } + } if *external { write!( f, @@ -757,6 +1030,13 @@ impl fmt::Display for Statement { location.as_ref().unwrap() )?; } + if !table_properties.is_empty() { + write!( + f, + " TBLPROPERTIES ({})", + display_comma_separated(table_properties) + )?; + } if !with_options.is_empty() { write!(f, " WITH ({})", display_comma_separated(with_options))?; } @@ -806,25 +1086,34 @@ impl fmt::Display for Statement { if_exists, names, cascade, + purge, } => write!( f, - "DROP {}{} {}{}", + "DROP {}{} {}{}{}", object_type, if *if_exists { " IF EXISTS" } else { "" }, display_comma_separated(names), if *cascade { " CASCADE" } else { "" }, + if *purge { " PURGE" } else { "" } ), Statement::SetVariable { local, variable, + hivevar, value, - } => write!( - f, - "SET{local} {variable} = {value}", - local = if *local { " LOCAL" } else { "" }, - variable = variable, - value = value - ), + } => { + f.write_str("SET ")?; + if *local { + f.write_str("LOCAL ")?; + } + write!( + f, + "{hivevar}{name} = {value}", + hivevar = if *hivevar { "HIVEVAR:" } else { "" }, + name = variable, + value = display_comma_separated(value) + ) + } Statement::ShowVariable { variable } => write!(f, "SHOW {}", variable), Statement::ShowColumns { extended, @@ -1086,6 +1375,62 @@ impl fmt::Display for ObjectType { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveDistributionStyle { + PARTITIONED { + columns: Vec, + }, + CLUSTERED { + columns: Vec, + sorted_by: Vec, + num_buckets: i32, + }, + SKEWED { + columns: Vec, + on: Vec, + stored_as_directories: bool, + }, + NONE, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveRowFormat { + SERDE { class: String }, + DELIMITED, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum HiveIOFormat { + IOF { + input_format: Expr, + output_format: Expr, + }, + FileFormat { + format: FileFormat, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct HiveFormat { + pub row_format: Option, + pub storage: Option, + pub location: Option, +} + +impl Default for HiveFormat { + fn default() -> Self { + HiveFormat { + row_format: None, + location: None, + storage: None, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct SqlOption { diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 57e70982f..732c81232 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -65,6 +65,7 @@ pub enum BinaryOperator { Lt, GtEq, LtEq, + Spaceship, Eq, NotEq, And, @@ -92,6 +93,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::Lt => "<", BinaryOperator::GtEq => ">=", BinaryOperator::LtEq => "<=", + BinaryOperator::Spaceship => "<=>", BinaryOperator::Eq => "=", BinaryOperator::NotEq => "<>", BinaryOperator::And => "AND", diff --git a/src/ast/query.rs b/src/ast/query.rs index 1b8ccf7e4..8f9ab499d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -57,6 +57,7 @@ impl fmt::Display for Query { /// A node in a tree, representing a "query body" expression, roughly: /// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` +#[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum SetExpr { @@ -73,6 +74,7 @@ pub enum SetExpr { right: Box, }, Values(Values), + Insert(Statement), // TODO: ANSI SQL supports `TABLE` here. } @@ -82,6 +84,7 @@ impl fmt::Display for SetExpr { SetExpr::Select(s) => write!(f, "{}", s), SetExpr::Query(q) => write!(f, "({})", q), SetExpr::Values(v) => write!(f, "{}", v), + SetExpr::Insert(v) => write!(f, "{}", v), SetExpr::SetOperation { left, right, @@ -126,10 +129,18 @@ pub struct Select { pub projection: Vec, /// FROM pub from: Vec, + /// LATERAL VIEWs + pub lateral_views: Vec, /// WHERE pub selection: Option, /// GROUP BY pub group_by: Vec, + /// CLUSTER BY (Hive) + pub cluster_by: Vec, + /// DISTRIBUTE BY (Hive) + pub distribute_by: Vec, + /// SORT BY (Hive) + pub sort_by: Vec, /// HAVING pub having: Option, } @@ -144,12 +155,34 @@ impl fmt::Display for Select { if !self.from.is_empty() { write!(f, " FROM {}", display_comma_separated(&self.from))?; } + if !self.lateral_views.is_empty() { + for lv in &self.lateral_views { + write!(f, "{}", lv)?; + } + } if let Some(ref selection) = self.selection { write!(f, " WHERE {}", selection)?; } if !self.group_by.is_empty() { write!(f, " GROUP BY {}", display_comma_separated(&self.group_by))?; } + if !self.cluster_by.is_empty() { + write!( + f, + " CLUSTER BY {}", + display_comma_separated(&self.cluster_by) + )?; + } + if !self.distribute_by.is_empty() { + write!( + f, + " DISTRIBUTE BY {}", + display_comma_separated(&self.distribute_by) + )?; + } + if !self.sort_by.is_empty() { + write!(f, " SORT BY {}", display_comma_separated(&self.sort_by))?; + } if let Some(ref having) = self.having { write!(f, " HAVING {}", having)?; } @@ -157,6 +190,40 @@ impl fmt::Display for Select { } } +/// A hive LATERAL VIEW with potential column aliases +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct LateralView { + /// LATERAL VIEW + pub lateral_view: Expr, + /// LATERAL VIEW table name + pub lateral_view_name: ObjectName, + /// LATERAL VIEW optional column aliases + pub lateral_col_alias: Vec, + /// LATERAL VIEW OUTER + pub outer: bool, +} + +impl fmt::Display for LateralView { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + " LATERAL VIEW{outer} {} {}", + self.lateral_view, + self.lateral_view_name, + outer = if self.outer { " OUTER" } else { "" } + )?; + if !self.lateral_col_alias.is_empty() { + write!( + f, + " AS {}", + display_comma_separated(&self.lateral_col_alias) + )?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct With { @@ -184,11 +251,16 @@ impl fmt::Display for With { pub struct Cte { pub alias: TableAlias, pub query: Query, + pub from: Option, } impl fmt::Display for Cte { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} AS ({})", self.alias, self.query) + write!(f, "{} AS ({})", self.alias, self.query)?; + if let Some(ref fr) = self.from { + write!(f, " FROM {}", fr)?; + } + Ok(()) } } @@ -417,6 +489,7 @@ pub enum JoinConstraint { On(Expr), Using(Vec), Natural, + None, } /// An `ORDER BY` expression diff --git a/src/ast/value.rs b/src/ast/value.rs index 9e82c175d..2afdfaeae 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -22,15 +22,17 @@ use std::fmt; pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] - Number(String), + Number(String, bool), #[cfg(feature = "bigdecimal")] - Number(BigDecimal), + Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), /// N'string value' NationalStringLiteral(String), /// X'hex value' HexStringLiteral(String), + + DoubleQuotedString(String), /// Boolean value true or false Boolean(bool), /// INTERVAL literals, roughly in the following format: @@ -59,7 +61,8 @@ pub enum Value { impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Value::Number(v) => write!(f, "{}", v), + Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), + Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs new file mode 100644 index 000000000..9b42857ec --- /dev/null +++ b/src/dialect/hive.rs @@ -0,0 +1,39 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug)] +pub struct HiveDialect {} + +impl Dialect for HiveDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + (ch == '"') || (ch == '`') + } + + fn is_identifier_start(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) + || ch == '$' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ('a'..='z').contains(&ch) + || ('A'..='Z').contains(&ch) + || ('0'..='9').contains(&ch) + || ch == '_' + || ch == '$' + || ch == '{' + || ch == '}' + } +} diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 6e7065043..306cd19d6 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -103,6 +103,7 @@ define_keywords!( BOTH, BY, BYTEA, + CACHE, CALL, CALLED, CARDINALITY, @@ -120,6 +121,7 @@ define_keywords!( CHECK, CLOB, CLOSE, + CLUSTER, COALESCE, COLLATE, COLLECT, @@ -127,6 +129,7 @@ define_keywords!( COLUMNS, COMMIT, COMMITTED, + COMPUTE, CONDITION, CONNECT, CONSTRAINT, @@ -157,6 +160,7 @@ define_keywords!( CURRENT_USER, CURSOR, CYCLE, + DATABASE, DATE, DAY, DEALLOCATE, @@ -165,13 +169,16 @@ define_keywords!( DECLARE, DEFAULT, DELETE, + DELIMITED, DENSE_RANK, DEREF, DESC, DESCRIBE, DETERMINISTIC, + DIRECTORY, DISCONNECT, DISTINCT, + DISTRIBUTE, DOUBLE, DROP, DYNAMIC, @@ -206,6 +213,7 @@ define_keywords!( FOLLOWING, FOR, FOREIGN, + FORMAT, FRAME_ROW, FREE, FROM, @@ -220,6 +228,7 @@ define_keywords!( GROUPS, HAVING, HEADER, + HIVEVAR, HOLD, HOUR, IDENTITY, @@ -229,6 +238,7 @@ define_keywords!( INDICATOR, INNER, INOUT, + INPUTFORMAT, INSENSITIVE, INSERT, INT, @@ -262,11 +272,13 @@ define_keywords!( LOCALTIMESTAMP, LOCATION, LOWER, + MANAGEDLOCATION, MATCH, MATERIALIZED, MAX, MEMBER, MERGE, + METADATA, METHOD, MIN, MINUTE, @@ -274,6 +286,7 @@ define_keywords!( MODIFIES, MODULE, MONTH, + MSCK, MULTISET, NATIONAL, NATURAL, @@ -284,6 +297,7 @@ define_keywords!( NO, NONE, NORMALIZE, + NOSCAN, NOT, NTH_VALUE, NTILE, @@ -305,13 +319,17 @@ define_keywords!( ORDER, OUT, OUTER, + OUTPUTFORMAT, OVER, OVERFLOW, OVERLAPS, OVERLAY, + OVERWRITE, PARAMETER, PARQUET, PARTITION, + PARTITIONED, + PARTITIONS, PERCENT, PERCENTILE_CONT, PERCENTILE_DISC, @@ -327,6 +345,7 @@ define_keywords!( PREPARE, PRIMARY, PROCEDURE, + PURGE, RANGE, RANK, RCFILE, @@ -349,6 +368,7 @@ define_keywords!( REGR_SYY, RELEASE, RENAME, + REPAIR, REPEATABLE, REPLACE, RESTRICT, @@ -372,6 +392,7 @@ define_keywords!( SELECT, SENSITIVE, SEQUENCEFILE, + SERDE, SERIALIZABLE, SESSION, SESSION_USER, @@ -380,6 +401,7 @@ define_keywords!( SIMILAR, SMALLINT, SOME, + SORT, SPECIFIC, SPECIFICTYPE, SQL, @@ -389,21 +411,27 @@ define_keywords!( SQRT, START, STATIC, + STATISTICS, STDDEV_POP, STDDEV_SAMP, STDIN, STORED, + STRING, SUBMULTISET, SUBSTRING, SUBSTRING_REGEX, SUCCEEDS, SUM, SYMMETRIC, + SYNC, SYSTEM, SYSTEM_TIME, SYSTEM_USER, TABLE, TABLESAMPLE, + TBLPROPERTIES, + TEMP, + TEMPORARY, TEXT, TEXTFILE, THEN, @@ -473,9 +501,12 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, @@ -492,6 +523,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::RIGHT, Keyword::NATURAL, Keyword::USING, + Keyword::CLUSTER, + Keyword::DISTRIBUTE, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) Keyword::OUTER, ]; @@ -506,15 +539,20 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::SELECT, Keyword::WHERE, Keyword::GROUP, + Keyword::SORT, Keyword::HAVING, Keyword::ORDER, Keyword::TOP, + Keyword::LATERAL, + Keyword::VIEW, Keyword::LIMIT, Keyword::OFFSET, Keyword::FETCH, Keyword::UNION, Keyword::EXCEPT, Keyword::INTERSECT, + Keyword::CLUSTER, + Keyword::DISTRIBUTE, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, ]; diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e656ab269..c7041ad93 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -12,6 +12,7 @@ mod ansi; mod generic; +mod hive; pub mod keywords; mod mssql; mod mysql; @@ -24,6 +25,7 @@ use std::fmt::Debug; pub use self::ansi::AnsiDialect; pub use self::generic::GenericDialect; +pub use self::hive::HiveDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; diff --git a/src/parser.rs b/src/parser.rs index 94afeb6e9..7a0b23101 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -48,12 +48,14 @@ pub enum IsOptional { Optional, Mandatory, } + use IsOptional::*; pub enum IsLateral { Lateral, NotLateral, } + use crate::ast::Statement::CreateVirtualTable; use IsLateral::*; @@ -137,6 +139,8 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Statement::Query(Box::new(self.parse_query()?))) } + Keyword::TRUNCATE => Ok(self.parse_truncate()?), + Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), Keyword::DROP => Ok(self.parse_drop()?), Keyword::DELETE => Ok(self.parse_delete()?), @@ -169,6 +173,104 @@ impl<'a> Parser<'a> { } } + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + }) + .unwrap_or_default(); + Ok(Statement::Msck { + repair, + table_name, + partition_action, + }) + } + + pub fn parse_truncate(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Ok(Statement::Truncate { + table_name, + partitions, + }) + } + + pub fn parse_analyze(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name()?; + let mut for_columns = false; + let mut cache_metadata = false; + let mut noscan = false; + let mut partitions = None; + let mut compute_statistics = false; + let mut columns = vec![]; + loop { + match self.parse_one_of_keywords(&[ + Keyword::PARTITION, + Keyword::FOR, + Keyword::CACHE, + Keyword::NOSCAN, + Keyword::COMPUTE, + ]) { + Some(Keyword::PARTITION) => { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Some(Keyword::NOSCAN) => noscan = true, + Some(Keyword::FOR) => { + self.expect_keyword(Keyword::COLUMNS)?; + + columns = self + .maybe_parse(|parser| { + parser.parse_comma_separated(Parser::parse_identifier) + }) + .unwrap_or_default(); + for_columns = true + } + Some(Keyword::CACHE) => { + self.expect_keyword(Keyword::METADATA)?; + cache_metadata = true + } + Some(Keyword::COMPUTE) => { + self.expect_keyword(Keyword::STATISTICS)?; + compute_statistics = true + } + _ => break, + } + } + + Ok(Statement::Analyze { + table_name, + for_columns, + columns, + partitions, + cache_metadata, + noscan, + compute_statistics, + }) + } + /// Parse a new expression pub fn parse_expr(&mut self) -> Result { self.parse_subexpr(0) @@ -182,6 +284,7 @@ impl<'a> Parser<'a> { loop { let next_precedence = self.get_next_precedence()?; debug!("next precedence: {:?}", next_precedence); + if precedence >= next_precedence { break; } @@ -316,13 +419,14 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - Token::Number(_) + Token::Number(_, _) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) } + Token::LParen => { let expr = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { @@ -334,7 +438,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(expr) } - unexpected => self.expected("an expression", unexpected), + unexpected => self.expected("an expression:", unexpected), }?; if self.parse_keyword(Keyword::COLLATE) { @@ -665,6 +769,8 @@ impl<'a> Parser<'a> { pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { let tok = self.next_token(); let regular_binary_operator = match &tok { + Token::Spaceship => Some(BinaryOperator::Spaceship), + Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), Token::Neq => Some(BinaryOperator::NotEq), Token::Gt => Some(BinaryOperator::Gt), @@ -744,12 +850,27 @@ impl<'a> Parser<'a> { op: UnaryOperator::PGPostfixFactorial, expr: Box::new(expr), }) + } else if Token::LBracket == tok { + self.parse_map_access(expr) } else { // Can only happen if `get_next_precedence` got out of sync with this function panic!("No infix parser for token {:?}", tok) } } + pub fn parse_map_access(&mut self, expr: Expr) -> Result { + let key = self.parse_literal_string()?; + let tok = self.consume_token(&Token::RBracket); + debug!("Tok: {}", tok); + match expr { + e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess { + column: Box::new(e), + key, + }), + _ => Ok(expr), + } + } + /// Parses the parens following the `[ NOT ] IN` operator pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { self.expect_token(&Token::LParen)?; @@ -820,7 +941,14 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::LIKE => Ok(Self::BETWEEN_PREC), - Token::Eq | Token::Lt | Token::LtEq | Token::Neq | Token::Gt | Token::GtEq => Ok(20), + Token::Eq + | Token::Lt + | Token::LtEq + | Token::Neq + | Token::Gt + | Token::GtEq + | Token::DoubleEq + | Token::Spaceship => Ok(20), Token::Pipe => Ok(21), Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(22), Token::Ampersand => Ok(23), @@ -828,6 +956,7 @@ impl<'a> Parser<'a> { Token::Mult | Token::Div | Token::Mod | Token::StringConcat => Ok(40), Token::DoubleColon => Ok(50), Token::ExclamationMark => Ok(50), + Token::LBracket | Token::RBracket => Ok(10), _ => Ok(0), } } @@ -911,7 +1040,7 @@ impl<'a> Parser<'a> { let index = self.index; for &keyword in keywords { if !self.parse_keyword(keyword) { - //println!("parse_keywords aborting .. did not find {}", keyword); + // println!("parse_keywords aborting .. did not find {:?}", keyword); // reset index and return immediately self.index = index; return false; @@ -1034,8 +1163,11 @@ impl<'a> Parser<'a> { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace) + self.parse_create_table(or_replace, temporary) } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); self.parse_create_view(or_replace) @@ -1088,31 +1220,67 @@ impl<'a> Parser<'a> { }) } + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name()?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, + } + } + Ok(Statement::CreateDatabase { + db_name, + if_not_exists: ine, + location, + managed_location, + }) + } + pub fn parse_create_external_table( &mut self, or_replace: bool, ) -> Result { self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; let (columns, constraints) = self.parse_columns()?; - self.expect_keywords(&[Keyword::STORED, Keyword::AS])?; - let file_format = self.parse_file_format()?; - self.expect_keyword(Keyword::LOCATION)?; - let location = self.parse_literal_string()?; + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(format.clone()), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; Ok(Statement::CreateTable { name: table_name, columns, constraints, + hive_distribution, + hive_formats: Some(hive_formats), with_options: vec![], + table_properties, or_replace, - if_not_exists: false, + if_not_exists, external: true, - file_format: Some(file_format), - location: Some(location), + temporary: false, + file_format, + location, query: None, without_rowid: false, + like: None, }) } @@ -1139,7 +1307,7 @@ impl<'a> Parser<'a> { // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; let columns = self.parse_parenthesized_column_list(Optional)?; - let with_options = self.parse_with_options()?; + let with_options = self.parse_options(Keyword::WITH)?; self.expect_keyword(Keyword::AS)?; let query = Box::new(self.parse_query()?); // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -1171,6 +1339,7 @@ impl<'a> Parser<'a> { let names = self.parse_comma_separated(Parser::parse_object_name)?; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); if cascade && restrict { return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP"); } @@ -1179,6 +1348,7 @@ impl<'a> Parser<'a> { if_exists, names, cascade, + purge, }) } @@ -1199,18 +1369,85 @@ impl<'a> Parser<'a> { }) } - pub fn parse_create_table(&mut self, or_replace: bool) -> Result { + //TODO: Implement parsing for Skewed and Clustered + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { columns }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[Keyword::ROW, Keyword::STORED, Keyword::LOCATION]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + } + None => break, + _ => break, + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => Ok(HiveRowFormat::DELIMITED), + } + } + + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name()?; + let like = if self.parse_keyword(Keyword::LIKE) { + self.parse_object_name().ok() + } else { + None + }; // parse optional column list (schema) let (columns, constraints) = self.parse_columns()?; // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_with_options()?; - + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(Box::new(self.parse_query()?)) @@ -1220,16 +1457,21 @@ impl<'a> Parser<'a> { Ok(Statement::CreateTable { name: table_name, + temporary, columns, constraints, with_options, + table_properties, or_replace, if_not_exists, + hive_distribution, + hive_formats: Some(hive_formats), external: false, file_format: None, location: None, query, without_rowid, + like, }) } @@ -1423,8 +1665,8 @@ impl<'a> Parser<'a> { } } - pub fn parse_with_options(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::WITH) { + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Parser::parse_sql_option)?; self.expect_token(&Token::RParen)?; @@ -1449,13 +1691,25 @@ impl<'a> Parser<'a> { if let Some(constraint) = self.parse_optional_table_constraint()? { AlterTableOperation::AddConstraint(constraint) } else { - let _ = self.parse_keyword(Keyword::COLUMN); - let column_def = self.parse_column_def()?; - AlterTableOperation::AddColumn { column_def } + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions: partitions, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let column_def = self.parse_column_def()?; + AlterTableOperation::AddColumn { column_def } + } } } else if self.parse_keyword(Keyword::RENAME) { if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_identifier()?; + let table_name = self.parse_object_name()?; AlterTableOperation::RenameTable { table_name } } else { let _ = self.parse_keyword(Keyword::COLUMN); @@ -1468,17 +1722,51 @@ impl<'a> Parser<'a> { } } } else if self.parse_keyword(Keyword::DROP) { - let _ = self.parse_keyword(Keyword::COLUMN); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier()?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } + } else { + let _ = self.parse_keyword(Keyword::COLUMN); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier()?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, + } + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, } } else { - return self.expected("ADD, RENAME, or DROP after ALTER TABLE", self.peek_token()); + return self.expected( + "ADD, RENAME, PARTITION or DROP after ALTER TABLE", + self.peek_token(), + ); }; Ok(Statement::AlterTable { name: table_name, @@ -1545,13 +1833,18 @@ impl<'a> Parser<'a> { Keyword::TRUE => Ok(Value::Boolean(true)), Keyword::FALSE => Ok(Value::Boolean(false)), Keyword::NULL => Ok(Value::Null), + Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(w.value)), + Some('\'') => Ok(Value::SingleQuotedString(w.value)), + _ => self.expected("A value?", Token::Word(w))?, + }, _ => self.expected("a concrete value", Token::Word(w)), }, // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n) => match n.parse() { - Ok(n) => Ok(Value::Number(n)), + Token::Number(ref n, l) => match n.parse() { + Ok(n) => Ok(Value::Number(n, l)), Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), @@ -1563,7 +1856,7 @@ impl<'a> Parser<'a> { pub fn parse_number_value(&mut self) -> Result { match self.parse_value()? { - v @ Value::Number(_) => Ok(v), + v @ Value::Number(_, _) => Ok(v), _ => { self.prev_token(); self.expected("literal number", self.peek_token()) @@ -1574,7 +1867,7 @@ impl<'a> Parser<'a> { /// Parse an unsigned literal integer/long pub fn parse_literal_uint(&mut self) -> Result { match self.next_token() { - Token::Number(s) => s.parse::().map_err(|e| { + Token::Number(s, _) => s.parse::().map_err(|e| { ParserError::ParserError(format!("Could not parse '{}' as u64: {}", s, e)) }), unexpected => self.expected("literal int", unexpected), @@ -1584,6 +1877,7 @@ impl<'a> Parser<'a> { /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { match self.next_token() { + Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), unexpected => self.expected("literal string", unexpected), } @@ -1632,6 +1926,7 @@ impl<'a> Parser<'a> { // parse_interval_literal for a taste. Keyword::INTERVAL => Ok(DataType::Interval), Keyword::REGCLASS => Ok(DataType::Regclass), + Keyword::STRING => Ok(DataType::String), Keyword::TEXT => { if self.consume_token(&Token::LBracket) { // Note: this is postgresql-specific @@ -1730,6 +2025,7 @@ impl<'a> Parser<'a> { pub fn parse_identifier(&mut self) -> Result { match self.next_token() { Token::Word(w) => Ok(w.to_ident()), + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), unexpected => self.expected("identifier", unexpected), } } @@ -1805,15 +2101,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_analyze(&mut self) -> Result { - // ANALYZE TABLE table_name - self.expect_keyword(Keyword::TABLE)?; - - let table_name = self.parse_object_name()?; - - Ok(Statement::Analyze { table_name }) - } - /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceeded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't @@ -1828,53 +2115,88 @@ impl<'a> Parser<'a> { None }; - let body = self.parse_query_body(0)?; + if !self.parse_keyword(Keyword::INSERT) { + let body = self.parse_query_body(0)?; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; - let offset = if self.parse_keyword(Keyword::OFFSET) { - Some(self.parse_offset()?) - } else { - None - }; + let offset = if self.parse_keyword(Keyword::OFFSET) { + Some(self.parse_offset()?) + } else { + None + }; - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; - Ok(Query { - with, - body, - limit, - order_by, - offset, - fetch, - }) + Ok(Query { + with, + body, + limit, + order_by, + offset, + fetch, + }) + } else { + let insert = self.parse_insert()?; + Ok(Query { + with, + body: SetExpr::Insert(insert), + limit: None, + order_by: vec![], + offset: None, + fetch: None, + }) + } } /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) fn parse_cte(&mut self) -> Result { - let alias = TableAlias { - name: self.parse_identifier()?, - columns: self.parse_parenthesized_column_list(Optional)?, + let name = self.parse_identifier()?; + + let mut cte = if self.parse_keyword(Keyword::AS) { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + } + } else { + let columns = self.parse_parenthesized_column_list(Optional)?; + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = TableAlias { name, columns }; + Cte { + alias, + query, + from: None, + } }; - self.expect_keyword(Keyword::AS)?; - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Cte { alias, query }) + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier()?); + } + Ok(cte) } /// Parse a "query body", which is an expression with roughly the @@ -1962,6 +2284,37 @@ impl<'a> Parser<'a> { } else { vec![] }; + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name()?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + Keyword::LATERAL, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .filter(|i| i.is_some()) + .map(|i| i.unwrap()) + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + outer, + }); + } else { + break; + } + } let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) @@ -1975,6 +2328,24 @@ impl<'a> Parser<'a> { vec![] }; + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let having = if self.parse_keyword(Keyword::HAVING) { Some(self.parse_expr()?) } else { @@ -1987,26 +2358,42 @@ impl<'a> Parser<'a> { projection, from, selection, + lateral_views, group_by, + cluster_by, + distribute_by, + sort_by, having, }) } pub fn parse_set(&mut self) -> Result { - let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL]); + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } let variable = self.parse_identifier()?; if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - let token = self.peek_token(); - let value = match (self.parse_value(), token) { - (Ok(value), _) => SetVariableValue::Literal(value), - (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), - (Err(_), unexpected) => self.expected("variable value", unexpected)?, - }; - Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - variable, - value, - }) + let mut values = vec![]; + loop { + let token = self.peek_token(); + let value = match (self.parse_value(), token) { + (Ok(value), _) => SetVariableValue::Literal(value), + (Err(_), Token::Word(ident)) => SetVariableValue::Ident(ident.to_ident()), + (Err(_), unexpected) => self.expected("variable value", unexpected)?, + }; + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variable, + value: values, + }); + } } else if variable.value == "TRANSACTION" && modifier.is_none() { Ok(Statement::SetTransaction { modes: self.parse_transaction_modes()?, @@ -2119,7 +2506,7 @@ impl<'a> Parser<'a> { } } Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()) + return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); } _ if natural => { return self.expected("a join type after NATURAL", self.peek_token()); @@ -2290,21 +2677,61 @@ impl<'a> Parser<'a> { let columns = self.parse_parenthesized_column_list(Mandatory)?; Ok(JoinConstraint::Using(columns)) } else { - self.expected("ON, or USING after JOIN", self.peek_token()) + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) } } /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { - self.expect_keyword(Keyword::INTO)?; - let table_name = self.parse_object_name()?; - let columns = self.parse_parenthesized_column_list(Optional)?; - let source = Box::new(self.parse_query()?); - Ok(Statement::Insert { - table_name, - columns, - source, - }) + let action = self.expect_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE])?; + let overwrite = action == Keyword::OVERWRITE; + let local = self.parse_keyword(Keyword::LOCAL); + + if self.parse_keyword(Keyword::DIRECTORY) { + let path = self.parse_literal_string()?; + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) + } else { + None + }; + let source = Box::new(self.parse_query()?); + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source, + }) + } else { + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name()?; + let columns = self.parse_parenthesized_column_list(Optional)?; + + let partitioned = if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let r = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + r + } else { + None + }; + + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = self.parse_parenthesized_column_list(Optional)?; + + let source = Box::new(self.parse_query()?); + Ok(Statement::Insert { + table_name, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + }) + } } pub fn parse_update(&mut self) -> Result { diff --git a/src/test_utils.rs b/src/test_utils.rs index 2fcacffa9..160d2c110 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -132,6 +132,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(MsSqlDialect {}), Box::new(AnsiDialect {}), Box::new(SnowflakeDialect {}), + Box::new(HiveDialect {}), ], } } @@ -153,7 +154,7 @@ pub fn expr_from_projection(item: &SelectItem) -> &Expr { } pub fn number(n: &'static str) -> Value { - Value::Number(n.parse().unwrap()) + Value::Number(n.parse().unwrap(), false) } pub fn table_alias(name: impl Into) -> Option { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bbad1a4c4..fd33f9589 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,7 +35,7 @@ pub enum Token { /// A keyword (like SELECT) or an optionally quoted SQL identifier Word(Word), /// An unsigned numeric literal - Number(String), + Number(String, bool), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' @@ -48,6 +48,8 @@ pub enum Token { Comma, /// Whitespace (space, tab, etc) Whitespace(Whitespace), + /// Double equals sign `==` + DoubleEq, /// Equality operator `=` Eq, /// Not Equals operator `<>` (or `!=` in some dialects) @@ -60,6 +62,8 @@ pub enum Token { LtEq, /// Greater Than Or Equals operator `>=` GtEq, + /// Spaceship operator <=> + Spaceship, /// Plus operator `+` Plus, /// Minus operator `-` @@ -127,13 +131,15 @@ impl fmt::Display for Token { match self { Token::EOF => f.write_str("EOF"), Token::Word(ref w) => write!(f, "{}", w), - Token::Number(ref n) => f.write_str(n), + Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }), Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), + Token::DoubleEq => f.write_str("=="), + Token::Spaceship => f.write_str("<=>"), Token::Eq => f.write_str("="), Token::Neq => f.write_str("<>"), Token::Lt => f.write_str("<"), @@ -296,7 +302,7 @@ impl<'a> Tokenizer<'a> { Token::Whitespace(Whitespace::Tab) => self.col += 4, Token::Word(w) if w.quote_style == None => self.col += w.value.len() as u64, Token::Word(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, - Token::Number(s) => self.col += s.len() as u64, + Token::Number(s, _) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -358,6 +364,15 @@ impl<'a> Tokenizer<'a> { ch if self.dialect.is_identifier_start(ch) => { chars.next(); // consume the first char let s = self.tokenize_word(ch, chars); + + if s.chars().all(|x| ('0'..='9').contains(&x) || x == '.') { + let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| { + matches!(ch, '0'..='9' | '.') + }); + let s2 = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); + s += s2.as_str(); + return Ok(Some(Token::Number(s, false))); + } Ok(Some(Token::make_word(&s, None))) } // string @@ -383,7 +398,13 @@ impl<'a> Tokenizer<'a> { '0'..='9' => { // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal let s = peeking_take_while(chars, |ch| matches!(ch, '0'..='9' | '.')); - Ok(Some(Token::Number(s))) + let long = if chars.peek() == Some(&'L') { + chars.next(); + true + } else { + false + }; + Ok(Some(Token::Number(s, long))) } // punctuation '(' => self.consume_and_return(chars, Token::LParen), @@ -461,7 +482,13 @@ impl<'a> Tokenizer<'a> { '<' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::LtEq), + Some('=') => { + chars.next(); + match chars.peek() { + Some('>') => self.consume_and_return(chars, Token::Spaceship), + _ => Ok(Some(Token::LtEq)), + } + } Some('>') => self.consume_and_return(chars, Token::Neq), Some('<') => self.consume_and_return(chars, Token::ShiftLeft), _ => Ok(Some(Token::Lt)), @@ -634,7 +661,7 @@ mod tests { let expected = vec![ Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -652,7 +679,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_word("sqrt", None), Token::LParen, - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::RParen, ]; @@ -724,11 +751,11 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), Token::Whitespace(Whitespace::Space), Token::make_keyword("LIMIT"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), ]; compare(expected, tokens); @@ -758,7 +785,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -790,7 +817,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), - Token::Number(String::from("1")), + Token::Number(String::from("1"), false), ]; compare(expected, tokens); @@ -943,12 +970,12 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), comment: "this is a comment\n".to_string(), }), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -975,11 +1002,11 @@ mod tests { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Number("0".to_string()), + Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::MultiLineComment( "multi-line\n* /comment".to_string(), )), - Token::Number("1".to_string()), + Token::Number("1".to_string(), false), ]; compare(expected, tokens); } @@ -1046,7 +1073,7 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_keyword("TOP"), Token::Whitespace(Whitespace::Space), - Token::Number(String::from("5")), + Token::Number(String::from("5"), false), Token::Whitespace(Whitespace::Space), Token::make_word("bar", Some('[')), Token::Whitespace(Whitespace::Space), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e7d78f950..ab4aa457b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -92,7 +92,7 @@ fn parse_insert_invalid() { let sql = "INSERT public.customer (id, name, active) VALUES (1, 2, 3)"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected INTO, found: public".to_string()), + ParserError::ParserError("Expected one of INTO or OVERWRITE, found: public".to_string()), res.unwrap_err() ); } @@ -454,11 +454,11 @@ fn parse_number() { #[cfg(feature = "bigdecimal")] assert_eq!( expr, - Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1))) + Expr::Value(Value::Number(bigdecimal::BigDecimal::from(1), false)) ); #[cfg(not(feature = "bigdecimal"))] - assert_eq!(expr, Expr::Value(Value::Number("1.0".into()))); + assert_eq!(expr, Expr::Value(Value::Number("1.0".into(), false))); } #[test] @@ -894,7 +894,7 @@ fn parse_select_having() { name: ObjectName(vec![Ident::new("COUNT")]), args: vec![FunctionArg::Unnamed(Expr::Wildcard)], over: None, - distinct: false + distinct: false, })), op: BinaryOperator::Gt, right: Box::new(Expr::Value(number("1"))) @@ -1639,18 +1639,6 @@ fn parse_explain_analyze_with_simple_select() { ); } -#[test] -fn parse_simple_analyze() { - let sql = "ANALYZE TABLE t"; - let stmt = verified_stmt(sql); - assert_eq!( - stmt, - Statement::Analyze { - table_name: ObjectName(vec![Ident::new("t")]) - } - ); -} - #[test] fn parse_named_argument_function() { let sql = "SELECT FUN(a => '1', b => '2') FROM foo"; @@ -2390,7 +2378,7 @@ fn parse_ctes() { fn assert_ctes_in_select(expected: &[&str], sel: &Query) { for (i, exp) in expected.iter().enumerate() { - let Cte { alias, query } = &sel.with.as_ref().unwrap().cte_tables[i]; + let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i]; assert_eq!(*exp, query.to_string()); assert_eq!( if i == 0 { @@ -2479,6 +2467,7 @@ fn parse_recursive_cte() { }], }, query: cte_query, + from: None, }; assert_eq!(with.cte_tables.first().unwrap(), &expected); } @@ -2799,6 +2788,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(false, if_exists); assert_eq!(ObjectType::Table, object_type); @@ -2818,6 +2808,7 @@ fn parse_drop_table() { if_exists, names, cascade, + purge: _, } => { assert_eq!(true, if_exists); assert_eq!(ObjectType::Table, object_type); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs new file mode 100644 index 000000000..585be989b --- /dev/null +++ b/tests/sqlparser_hive.rs @@ -0,0 +1,212 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![warn(clippy::all)] + +//! Test SQL syntax specific to Hive. The parser based on the generic dialect +//! is also tested (on the inputs it can handle). + +use sqlparser::dialect::HiveDialect; +use sqlparser::test_utils::*; + +#[test] +fn parse_table_create() { + let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; + let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; + + hive().verified_stmt(sql); + hive().verified_stmt(iof); +} + +#[test] +fn parse_insert_overwrite() { + let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; + hive().verified_stmt(insert_partitions); +} + +#[test] +fn test_truncate() { + let truncate = r#"TRUNCATE TABLE db.table"#; + hive().verified_stmt(truncate); +} + +#[test] +fn parse_analyze() { + let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS NOSCAN CACHE METADATA"#; + hive().verified_stmt(analyze); +} + +#[test] +fn parse_analyze_for_columns() { + let analyze = + r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS"#; + hive().verified_stmt(analyze); +} + +#[test] +fn parse_msck() { + let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; + let msck2 = r#"MSCK REPAIR TABLE db.table_name"#; + hive().verified_stmt(msck); + hive().verified_stmt(msck2); +} + +#[test] +fn parse_set() { + let set = "SET HIVEVAR:name = a, b, c_d"; + hive().verified_stmt(set); +} + +#[test] +fn test_spaceship() { + let spaceship = "SELECT * FROM db.table WHERE a <=> b"; + hive().verified_stmt(spaceship); +} + +#[test] +fn parse_with_cte() { + let with = "WITH a AS (SELECT * FROM b) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM b"; + hive().verified_stmt(with); +} + +#[test] +fn drop_table_purge() { + let purge = "DROP TABLE db.table_name PURGE"; + hive().verified_stmt(purge); +} + +#[test] +fn create_table_like() { + let like = "CREATE TABLE db.table_name LIKE db.other_table"; + hive().verified_stmt(like); +} + +// Turning off this test until we can parse identifiers starting with numbers :( +#[test] +fn test_identifier() { + let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; + hive().verified_stmt(between); +} + +#[test] +fn test_alter_partition() { + let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1)"; + hive().verified_stmt(alter); +} + +#[test] +fn test_add_partition() { + let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (a = 'asdf', b = 2)"; + hive().verified_stmt(add); +} + +#[test] +fn test_drop_partition() { + let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; + hive().verified_stmt(drop); +} + +#[test] +fn test_drop_if_exists() { + let drop = "ALTER TABLE db.table DROP IF EXISTS PARTITION (a = 'b', c = 'd')"; + hive().verified_stmt(drop); +} + +#[test] +fn test_cluster_by() { + let cluster = "SELECT a FROM db.table CLUSTER BY a, b"; + hive().verified_stmt(cluster); +} + +#[test] +fn test_distribute_by() { + let cluster = "SELECT a FROM db.table DISTRIBUTE BY a, b"; + hive().verified_stmt(cluster); +} + +#[test] +fn no_join_condition() { + let join = "SELECT a, b FROM db.table_name JOIN a"; + hive().verified_stmt(join); +} + +#[test] +fn columns_after_partition() { + let query = "INSERT INTO db.table_name PARTITION (a, b) (c, d) SELECT a, b, c, d FROM db.table"; + hive().verified_stmt(query); +} + +#[test] +fn long_numerics() { + let query = r#"SELECT MIN(MIN(10, 5), 1L) AS a"#; + hive().verified_stmt(query); +} + +#[test] +fn decimal_precision() { + let query = "SELECT CAST(a AS DECIMAL(18,2)) FROM db.table"; + let expected = "SELECT CAST(a AS NUMERIC(18,2)) FROM db.table"; + hive().one_statement_parses_to(query, expected); +} + +#[test] +fn create_temp_table() { + let query = "CREATE TEMPORARY TABLE db.table (a INT NOT NULL)"; + let query2 = "CREATE TEMP TABLE db.table (a INT NOT NULL)"; + + hive().verified_stmt(query); + hive().one_statement_parses_to(query2, query); +} + +#[test] +fn create_local_directory() { + let query = + "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; + hive().verified_stmt(query); +} + +#[test] +fn lateral_view() { + let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS j, P LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; + hive().verified_stmt(view); +} + +#[test] +fn sort_by() { + let sort_by = "SELECT * FROM db.table SORT BY a"; + hive().verified_stmt(sort_by); +} + +#[test] +fn rename_table() { + let rename = "ALTER TABLE db.table_name RENAME TO db.table_2"; + hive().verified_stmt(rename); +} + +#[test] +fn map_access() { + let rename = "SELECT a.b[\"asdf\"] FROM db.table WHERE a = 2"; + hive().verified_stmt(rename); +} + +#[test] +fn from_cte() { + let rename = + "WITH cte AS (SELECT * FROM a.b) FROM cte INSERT INTO TABLE a.b PARTITION (a) SELECT *"; + println!("{}", hive().verified_stmt(rename)); +} + +fn hive() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(HiveDialect {})], + } +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 20f186100..2abd8ae9b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -364,8 +364,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } ); @@ -374,8 +375,11 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(Value::SingleQuotedString("b".into())), + value: vec![SetVariableValue::Literal(Value::SingleQuotedString( + "b".into() + ))], } ); @@ -384,8 +388,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Literal(number("0")), + value: vec![SetVariableValue::Literal(number("0"))], } ); @@ -394,8 +399,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: false, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("DEFAULT".into()), + value: vec![SetVariableValue::Ident("DEFAULT".into())], } ); @@ -404,8 +410,9 @@ fn parse_set() { stmt, Statement::SetVariable { local: true, + hivevar: false, variable: "a".into(), - value: SetVariableValue::Ident("b".into()), + value: vec![SetVariableValue::Ident("b".into())], } );