From 56e5c71c4d10685c71ebe8c223ab127b3603cf4b Mon Sep 17 00:00:00 2001 From: Haoyan Geng Date: Mon, 16 Jun 2025 14:10:17 -0700 Subject: [PATCH] [SPARK-52494] Support colon-sign opeorator syntax to access Variant fields. --- .../sql/catalyst/parser/SqlBaseParser.g4 | 30 ++ .../expressions/SemiStructuredExtract.scala | 60 +++ .../json/JsonExpressionEvalUtils.scala | 8 +- .../sql/catalyst/parser/AstBuilder.scala | 20 + .../sql/catalyst/rules/RuleIdCollection.scala | 1 + .../sql/catalyst/trees/TreePatterns.scala | 1 + .../internal/BaseSessionStateBuilder.scala | 3 +- .../variant-field-extractions.sql.out | 380 ++++++++++++++++++ .../inputs/variant-field-extractions.sql | 66 +++ .../results/variant-field-extractions.sql.out | 287 +++++++++++++ .../sql/hive/HiveSessionStateBuilder.scala | 3 +- 11 files changed, 853 insertions(+), 6 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SemiStructuredExtract.scala create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/variant-field-extractions.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/variant-field-extractions.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/variant-field-extractions.sql.out diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 08f222b2f4124..84e6084f3da46 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -1208,6 +1208,7 @@ primaryExpression | constant #constantDefault | ASTERISK exceptClause? #star | qualifiedName DOT ASTERISK exceptClause? #star + | col=primaryExpression COLON path=semiStructuredExtractionPath #semiStructuredExtract | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor | LEFT_PAREN query RIGHT_PAREN #subqueryExpression | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument @@ -1230,6 +1231,35 @@ primaryExpression FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN #overlay ; +semiStructuredExtractionPath + : jsonPathFirstPart (jsonPathParts)* + ; + +jsonPathIdentifier + : identifier + | BACKQUOTED_IDENTIFIER + ; + +jsonPathBracketedIdentifier + : LEFT_BRACKET stringLit RIGHT_BRACKET + ; + +jsonPathFirstPart + : jsonPathIdentifier + | jsonPathBracketedIdentifier + | DOT + | LEFT_BRACKET INTEGER_VALUE RIGHT_BRACKET + | LEFT_BRACKET ASTERISK RIGHT_BRACKET + ; + +jsonPathParts + : DOT jsonPathIdentifier + | jsonPathBracketedIdentifier + | LEFT_BRACKET INTEGER_VALUE RIGHT_BRACKET + | LEFT_BRACKET ASTERISK RIGHT_BRACKET + | LEFT_BRACKET identifier RIGHT_BRACKET + ; + literalType : DATE | TIME diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SemiStructuredExtract.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SemiStructuredExtract.scala new file mode 100644 index 0000000000000..af5d2ec39d00e --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SemiStructuredExtract.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.variant.VariantGet +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.{SEMI_STRUCTURED_EXTRACT, TreePattern} +import org.apache.spark.sql.types.{DataType, StringType, VariantType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * Represents the extraction of data from a field that contains semi-structured data. The + * semi-structured column can only be a Variant type for now. + * @param child The semi-structured column + * @param field The field to extract + */ +case class SemiStructuredExtract( + child: Expression, field: String) extends UnaryExpression with Unevaluable { + override lazy val resolved = false + override def dataType: DataType = StringType + + final override val nodePatterns: Seq[TreePattern] = Seq(SEMI_STRUCTURED_EXTRACT) + + override protected def withNewChildInternal(newChild: Expression): SemiStructuredExtract = + copy(child = newChild) +} + +/** + * Replaces SemiStructuredExtract expressions by extracting the specified field from the + * semi-structured column (only VariantType is supported for now). + */ +case object ExtractSemiStructuredFields extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressionsWithPruning( + _.containsPattern(SEMI_STRUCTURED_EXTRACT), ruleId) { + case SemiStructuredExtract(column, field) if column.resolved => + if (column.dataType.isInstanceOf[VariantType]) { + VariantGet(column, Literal(UTF8String.fromString(field)), VariantType, failOnError = true) + } else { + throw new AnalysisException( + errorClass = "COLUMN_IS_NOT_VARIANT_TYPE", messageParameters = Map.empty) + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala index b942006e87e9d..f0473f5a414f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala @@ -33,13 +33,13 @@ import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType, Str import org.apache.spark.unsafe.types.{UTF8String, VariantVal} import org.apache.spark.util.Utils -private[this] sealed trait PathInstruction -private[this] object PathInstruction { +sealed trait PathInstruction +object PathInstruction { private[expressions] case object Subscript extends PathInstruction private[expressions] case object Wildcard extends PathInstruction private[expressions] case object Key extends PathInstruction private[expressions] case class Index(index: Long) extends PathInstruction - private[expressions] case class Named(name: String) extends PathInstruction + case class Named(name: String) extends PathInstruction } private[this] sealed trait WriteStyle @@ -49,7 +49,7 @@ private[this] object WriteStyle { private[expressions] case object FlattenStyle extends WriteStyle } -private[this] object JsonPathParser extends RegexParsers { +object JsonPathParser extends RegexParsers { import PathInstruction._ def root: Parser[Char] = '$' diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 9b9ff2175457b..54528f706b03f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -36,6 +36,8 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FUNC_ALIAS import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, ClusterBySpec} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First, Last} +import org.apache.spark.sql.catalyst.expressions.json.JsonPathParser +import org.apache.spark.sql.catalyst.expressions.json.PathInstruction.Named import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -3322,6 +3324,24 @@ class AstBuilder extends DataTypeAstBuilder } } + /** + * Create a [[SemiStructuredExtract]] expression. + */ + override def visitSemiStructuredExtract( + ctx: SemiStructuredExtractContext): Expression = withOrigin(ctx) { + val field = ctx.path.getText + // When `field` starts with a bracket, do not add a `.` as the bracket already implies nesting + // Also the bracket will imply case sensitive field extraction. + val path = if (field.startsWith("[")) "$" + field else s"$$.$field" + val parsedPath = JsonPathParser.parse(path) + if (parsedPath.isEmpty) { + throw new ParseException(errorClass = "PARSE_SYNTAX_ERROR", ctx = ctx) + } + val potentialAlias = parsedPath.get.collect { case Named(name) => name }.lastOption + val node = SemiStructuredExtract(expression(ctx.col), path) + potentialAlias.map { colName => Alias(node, colName)() }.getOrElse(node) + } + /** * Create an [[UnresolvedAttribute]] expression or a [[UnresolvedRegex]] if it is a regex * quoted in `` diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala index 0d376861ddfb9..e7b59af5e7766 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala @@ -113,6 +113,7 @@ object RuleIdCollection { "org.apache.spark.sql.catalyst.expressions.ValidateAndStripPipeExpressions" :: "org.apache.spark.sql.catalyst.analysis.ResolveUnresolvedHaving" :: "org.apache.spark.sql.catalyst.analysis.ResolveTableConstraints" :: + "org.apache.spark.sql.catalyst.expressions.ExtractSemiStructuredFields" :: // Catalyst Optimizer rules "org.apache.spark.sql.catalyst.optimizer.BooleanSimplification" :: "org.apache.spark.sql.catalyst.optimizer.CollapseProject" :: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala index 471e68b6b7082..c35aa7403d767 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala @@ -80,6 +80,7 @@ object TreePattern extends Enumeration { val REGEXP_EXTRACT_FAMILY: Value = Value val REGEXP_REPLACE: Value = Value val RUNTIME_REPLACEABLE: Value = Value + val SEMI_STRUCTURED_EXTRACT: Value = Value val SCALAR_SUBQUERY: Value = Value val SCALAR_SUBQUERY_REFERENCE: Value = Value val SCALA_UDF: Value = Value diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 334616a7709e0..9942918638c55 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.artifact.ArtifactManager import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, InvokeProcedures, ReplaceCharWithVarchar, ResolveDataSource, ResolveSessionCatalog, ResolveTranspose, TableFunctionRegistry} import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension import org.apache.spark.sql.catalyst.catalog.{FunctionExpressionBuilder, SessionCatalog} -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.{Expression, ExtractSemiStructuredFields} import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -244,6 +244,7 @@ abstract class BaseSessionStateBuilder( new EvalSubqueriesForTimeTravel +: new ResolveTranspose(session) +: new InvokeProcedures(session) +: + ExtractSemiStructuredFields +: customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/variant-field-extractions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/variant-field-extractions.sql.out new file mode 100644 index 0000000000000..c15a356c47ab9 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/variant-field-extractions.sql.out @@ -0,0 +1,380 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMP VIEW variant_test_data AS +SELECT + parse_json('{ "price": 30 }') as int_price_variant, + parse_json('{ "price": 12345.678 }') as double_price_variant, + parse_json('{ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }') as multi_field_variant, + parse_json('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') as array_value_variant, + parse_json('[{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }]') as array_variant, + parse_json('{ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }') as nested_variant, + parse_json('{ "field-name": "value1", "field.name": "value2", "field_name": "value3" }') as special_chars_variant +-- !query analysis +CreateViewCommand `variant_test_data`, SELECT + parse_json('{ "price": 30 }') as int_price_variant, + parse_json('{ "price": 12345.678 }') as double_price_variant, + parse_json('{ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }') as multi_field_variant, + parse_json('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') as array_value_variant, + parse_json('[{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }]') as array_variant, + parse_json('{ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }') as nested_variant, + parse_json('{ "field-name": "value1", "field.name": "value2", "field_name": "value3" }') as special_chars_variant, false, false, LocalTempView, UNSUPPORTED, true + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select int_price_variant:price from variant_test_data +-- !query analysis +Project [variant_get(int_price_variant#x, $.price, VariantType, true, Some(America/Los_Angeles)) AS price#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select int_price_variant:price::decimal(5, 2) from variant_test_data +-- !query analysis +Project [cast(variant_get(int_price_variant#x, $.price, VariantType, true, Some(America/Los_Angeles)) as decimal(5,2)) AS price#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select int_price_variant:price::string from variant_test_data +-- !query analysis +Project [cast(variant_get(int_price_variant#x, $.price, VariantType, true, Some(America/Los_Angeles)) as string) AS price#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select double_price_variant:price::decimal(3, 2) from variant_test_data +-- !query analysis +Project [cast(variant_get(double_price_variant#x, $.price, VariantType, true, Some(America/Los_Angeles)) as decimal(3,2)) AS price#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select multi_field_variant:name, multi_field_variant:age, multi_field_variant:city from variant_test_data +-- !query analysis +Project [variant_get(multi_field_variant#x, $.name, VariantType, true, Some(America/Los_Angeles)) AS name#x, variant_get(multi_field_variant#x, $.age, VariantType, true, Some(America/Los_Angeles)) AS age#x, variant_get(multi_field_variant#x, $.city, VariantType, true, Some(America/Los_Angeles)) AS city#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select multi_field_variant:name::string, multi_field_variant:age::int, multi_field_variant:active::boolean from variant_test_data +-- !query analysis +Project [cast(variant_get(multi_field_variant#x, $.name, VariantType, true, Some(America/Los_Angeles)) as string) AS name#x, cast(variant_get(multi_field_variant#x, $.age, VariantType, true, Some(America/Los_Angeles)) as int) AS age#x, cast(variant_get(multi_field_variant#x, $.active, VariantType, true, Some(America/Los_Angeles)) as boolean) AS active#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_value_variant:item[0].model from variant_test_data +-- !query analysis +Project [variant_get(array_value_variant#x, $.item[0].model, VariantType, true, Some(America/Los_Angeles)) AS model#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_value_variant:item[0].price from variant_test_data +-- !query analysis +Project [variant_get(array_value_variant#x, $.item[0].price, VariantType, true, Some(America/Los_Angeles)) AS price#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_value_variant:item[1].model from variant_test_data +-- !query analysis +Project [variant_get(array_value_variant#x, $.item[1].model, VariantType, true, Some(America/Los_Angeles)) AS model#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_value_variant:item[1].price from variant_test_data +-- !query analysis +Project [variant_get(array_value_variant#x, $.item[1].price, VariantType, true, Some(America/Los_Angeles)) AS price#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_variant:[0].id from variant_test_data +-- !query analysis +Project [variant_get(array_variant#x, $[0].id, VariantType, true, Some(America/Los_Angeles)) AS id#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_variant:[0].name from variant_test_data +-- !query analysis +Project [variant_get(array_variant#x, $[0].name, VariantType, true, Some(America/Los_Angeles)) AS name#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_variant:[1].id from variant_test_data +-- !query analysis +Project [variant_get(array_variant#x, $[1].id, VariantType, true, Some(America/Los_Angeles)) AS id#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select array_variant:[1].name from variant_test_data +-- !query analysis +Project [variant_get(array_variant#x, $[1].name, VariantType, true, Some(America/Los_Angeles)) AS name#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select nested_variant:metadata.version from variant_test_data +-- !query analysis +Project [variant_get(nested_variant#x, $.metadata.version, VariantType, true, Some(America/Los_Angeles)) AS version#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select nested_variant:metadata.tags[0] from variant_test_data +-- !query analysis +Project [variant_get(nested_variant#x, $.metadata.tags[0], VariantType, true, Some(America/Los_Angeles)) AS tags#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select nested_variant:metadata.nested.level from variant_test_data +-- !query analysis +Project [variant_get(nested_variant#x, $.metadata.nested.level, VariantType, true, Some(America/Los_Angeles)) AS level#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select nested_variant:metadata.nested.value from variant_test_data +-- !query analysis +Project [variant_get(nested_variant#x, $.metadata.nested.value, VariantType, true, Some(America/Los_Angeles)) AS value#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select special_chars_variant:`field-name`::string from variant_test_data +-- !query analysis +Project [cast(variant_get(special_chars_variant#x, $.field-name, VariantType, true, Some(America/Los_Angeles)) as string) AS field-name#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select special_chars_variant:`field.name`::string from variant_test_data +-- !query analysis +Project [cast(variant_get(special_chars_variant#x, $.field.name, VariantType, true, Some(America/Los_Angeles)) as string) AS name#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select special_chars_variant:field_name::string from variant_test_data +-- !query analysis +Project [cast(variant_get(special_chars_variant#x, $.field_name, VariantType, true, Some(America/Los_Angeles)) as string) AS field_name#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select multi_field_variant:scores[0]::int + multi_field_variant:scores[1]::int from variant_test_data +-- !query analysis +Project [(cast(variant_get(multi_field_variant#x, $.scores[0], VariantType, true, Some(America/Los_Angeles)) as int) + cast(variant_get(multi_field_variant#x, $.scores[1], VariantType, true, Some(America/Los_Angeles)) as int)) AS (CAST(variant_get(multi_field_variant, $.scores[0]) AS scores AS INT) + CAST(variant_get(multi_field_variant, $.scores[1]) AS scores AS INT))#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select count(*) from (select explode(cast(multi_field_variant:scores as array)) as score from variant_test_data) +-- !query analysis +Aggregate [count(1) AS count(1)#xL] ++- SubqueryAlias __auto_generated_subquery_name + +- Project [score#x] + +- Generate explode(cast(variant_get(multi_field_variant#x, $.scores, VariantType, true, Some(America/Los_Angeles)) as array)), false, [score#x] + +- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select * from variant_test_data +-- !query analysis +Project [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select multi_field_variant:* from variant_test_data +-- !query analysis +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'*'", + "hint" : "" + } +} + + +-- !query +select typeof(multi_field_variant:name) from variant_test_data +-- !query analysis +Project [typeof(variant_get(multi_field_variant#x, $.name, VariantType, true, Some(America/Los_Angeles))) AS typeof(variant_get(multi_field_variant, $.name) AS name)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select typeof(multi_field_variant:age) from variant_test_data +-- !query analysis +Project [typeof(variant_get(multi_field_variant#x, $.age, VariantType, true, Some(America/Los_Angeles))) AS typeof(variant_get(multi_field_variant, $.age) AS age)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select typeof(multi_field_variant:active) from variant_test_data +-- !query analysis +Project [typeof(variant_get(multi_field_variant#x, $.active, VariantType, true, Some(America/Los_Angeles))) AS typeof(variant_get(multi_field_variant, $.active) AS active)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select typeof(multi_field_variant:scores) from variant_test_data +-- !query analysis +Project [typeof(variant_get(multi_field_variant#x, $.scores, VariantType, true, Some(America/Los_Angeles))) AS typeof(variant_get(multi_field_variant, $.scores) AS scores)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select isnull(multi_field_variant:missing_field) from variant_test_data +-- !query analysis +Project [isnull(variant_get(multi_field_variant#x, $.missing_field, VariantType, true, Some(America/Los_Angeles))) AS (variant_get(multi_field_variant, $.missing_field) AS missing_field IS NULL)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select isnotnull(multi_field_variant:name) from variant_test_data +-- !query analysis +Project [isnotnull(variant_get(multi_field_variant#x, $.name, VariantType, true, Some(America/Los_Angeles))) AS (variant_get(multi_field_variant, $.name) AS name IS NOT NULL)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation + + +-- !query +select coalesce(multi_field_variant:missing_field, 'default_value') from variant_test_data +-- !query analysis +Project [coalesce(variant_get(multi_field_variant#x, $.missing_field, VariantType, true, Some(America/Los_Angeles)), cast(default_value as variant)) AS coalesce(variant_get(multi_field_variant, $.missing_field) AS missing_field, default_value)#x] ++- SubqueryAlias variant_test_data + +- View (`variant_test_data`, [int_price_variant#x, double_price_variant#x, multi_field_variant#x, array_value_variant#x, array_variant#x, nested_variant#x, special_chars_variant#x]) + +- Project [cast(int_price_variant#x as variant) AS int_price_variant#x, cast(double_price_variant#x as variant) AS double_price_variant#x, cast(multi_field_variant#x as variant) AS multi_field_variant#x, cast(array_value_variant#x as variant) AS array_value_variant#x, cast(array_variant#x as variant) AS array_variant#x, cast(nested_variant#x as variant) AS nested_variant#x, cast(special_chars_variant#x as variant) AS special_chars_variant#x] + +- Project [parse_json({ "price": 30 }, true) AS int_price_variant#x, parse_json({ "price": 12345.678 }, true) AS double_price_variant#x, parse_json({ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }, true) AS multi_field_variant#x, parse_json({ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }, true) AS array_value_variant#x, parse_json([{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }], true) AS array_variant#x, parse_json({ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }, true) AS nested_variant#x, parse_json({ "field-name": "value1", "field.name": "value2", "field_name": "value3" }, true) AS special_chars_variant#x] + +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/variant-field-extractions.sql b/sql/core/src/test/resources/sql-tests/inputs/variant-field-extractions.sql new file mode 100644 index 0000000000000..693fdacf2990c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/variant-field-extractions.sql @@ -0,0 +1,66 @@ +-- Create temp view with Variant columns for testing field extraction and type casting. +CREATE TEMP VIEW variant_test_data AS +SELECT + parse_json('{ "price": 30 }') as int_price_variant, + parse_json('{ "price": 12345.678 }') as double_price_variant, + parse_json('{ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }') as multi_field_variant, + parse_json('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') as array_value_variant, + parse_json('[{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }]') as array_variant, + parse_json('{ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }') as nested_variant, + parse_json('{ "field-name": "value1", "field.name": "value2", "field_name": "value3" }') as special_chars_variant; + +-- Single field extraction and type casting. +select int_price_variant:price from variant_test_data; +select int_price_variant:price::decimal(5, 2) from variant_test_data; +select int_price_variant:price::string from variant_test_data; + +-- Applying an invalid function -- will throw an error. +select double_price_variant:price::decimal(3, 2) from variant_test_data; + +-- Multi-field access. +select multi_field_variant:name, multi_field_variant:age, multi_field_variant:city from variant_test_data; +select multi_field_variant:name::string, multi_field_variant:age::int, multi_field_variant:active::boolean from variant_test_data; + +-- Array value access. +select array_value_variant:item[0].model from variant_test_data; +select array_value_variant:item[0].price from variant_test_data; +select array_value_variant:item[1].model from variant_test_data; +select array_value_variant:item[1].price from variant_test_data; + +-- Array access. +select array_variant:[0].id from variant_test_data; +select array_variant:[0].name from variant_test_data; +select array_variant:[1].id from variant_test_data; +select array_variant:[1].name from variant_test_data; + +-- Nested field access. +select nested_variant:metadata.version from variant_test_data; +select nested_variant:metadata.tags[0] from variant_test_data; +select nested_variant:metadata.nested.level from variant_test_data; +select nested_variant:metadata.nested.value from variant_test_data; + +-- Special characters. +select special_chars_variant:`field-name`::string from variant_test_data; +-- Not supported; will return NULL. +select special_chars_variant:`field.name`::string from variant_test_data; +select special_chars_variant:field_name::string from variant_test_data; + +-- Array operations on Variant arrays. +select multi_field_variant:scores[0]::int + multi_field_variant:scores[1]::int from variant_test_data; +select count(*) from (select explode(cast(multi_field_variant:scores as array)) as score from variant_test_data); + +-- ASTERISK syntax. +select * from variant_test_data; +-- Not supported; will throw an error. +select multi_field_variant:* from variant_test_data; + +-- Type checking: The result of the following would all be 'variant'. +select typeof(multi_field_variant:name) from variant_test_data; +select typeof(multi_field_variant:age) from variant_test_data; +select typeof(multi_field_variant:active) from variant_test_data; +select typeof(multi_field_variant:scores) from variant_test_data; + +-- Variant field access with NULL handling. +select isnull(multi_field_variant:missing_field) from variant_test_data; +select isnotnull(multi_field_variant:name) from variant_test_data; +select coalesce(multi_field_variant:missing_field, 'default_value') from variant_test_data; diff --git a/sql/core/src/test/resources/sql-tests/results/variant-field-extractions.sql.out b/sql/core/src/test/resources/sql-tests/results/variant-field-extractions.sql.out new file mode 100644 index 0000000000000..d4f9dc2c35c9b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/variant-field-extractions.sql.out @@ -0,0 +1,287 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMP VIEW variant_test_data AS +SELECT + parse_json('{ "price": 30 }') as int_price_variant, + parse_json('{ "price": 12345.678 }') as double_price_variant, + parse_json('{ "name": "John", "age": 30, "city": "New York", "active": true, "scores": [85, 92, 78] }') as multi_field_variant, + parse_json('{ "item": [ { "model" : "basic", "price" : 6.12 }, { "model" : "medium", "price" : 9.24 } ] }') as array_value_variant, + parse_json('[{ "id": 1, "name": "Alice" }, { "id": 2, "name": "Bob" }, { "id": 3, "name": "Charlie" }]') as array_variant, + parse_json('{ "metadata": { "version": "1.0", "tags": ["important", "urgent"], "nested": { "level": 2, "value": "deep" } } }') as nested_variant, + parse_json('{ "field-name": "value1", "field.name": "value2", "field_name": "value3" }') as special_chars_variant +-- !query schema +struct<> +-- !query output + + + +-- !query +select int_price_variant:price from variant_test_data +-- !query schema +struct +-- !query output +30 + + +-- !query +select int_price_variant:price::decimal(5, 2) from variant_test_data +-- !query schema +struct +-- !query output +30.00 + + +-- !query +select int_price_variant:price::string from variant_test_data +-- !query schema +struct +-- !query output +30 + + +-- !query +select double_price_variant:price::decimal(3, 2) from variant_test_data +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "INVALID_VARIANT_CAST", + "sqlState" : "22023", + "messageParameters" : { + "dataType" : "\"DECIMAL(3,2)\"", + "value" : "12345.678" + } +} + + +-- !query +select multi_field_variant:name, multi_field_variant:age, multi_field_variant:city from variant_test_data +-- !query schema +struct +-- !query output +"John" 30 "New York" + + +-- !query +select multi_field_variant:name::string, multi_field_variant:age::int, multi_field_variant:active::boolean from variant_test_data +-- !query schema +struct +-- !query output +John 30 true + + +-- !query +select array_value_variant:item[0].model from variant_test_data +-- !query schema +struct +-- !query output +"basic" + + +-- !query +select array_value_variant:item[0].price from variant_test_data +-- !query schema +struct +-- !query output +6.12 + + +-- !query +select array_value_variant:item[1].model from variant_test_data +-- !query schema +struct +-- !query output +"medium" + + +-- !query +select array_value_variant:item[1].price from variant_test_data +-- !query schema +struct +-- !query output +9.24 + + +-- !query +select array_variant:[0].id from variant_test_data +-- !query schema +struct +-- !query output +1 + + +-- !query +select array_variant:[0].name from variant_test_data +-- !query schema +struct +-- !query output +"Alice" + + +-- !query +select array_variant:[1].id from variant_test_data +-- !query schema +struct +-- !query output +2 + + +-- !query +select array_variant:[1].name from variant_test_data +-- !query schema +struct +-- !query output +"Bob" + + +-- !query +select nested_variant:metadata.version from variant_test_data +-- !query schema +struct +-- !query output +"1.0" + + +-- !query +select nested_variant:metadata.tags[0] from variant_test_data +-- !query schema +struct +-- !query output +"important" + + +-- !query +select nested_variant:metadata.nested.level from variant_test_data +-- !query schema +struct +-- !query output +2 + + +-- !query +select nested_variant:metadata.nested.value from variant_test_data +-- !query schema +struct +-- !query output +"deep" + + +-- !query +select special_chars_variant:`field-name`::string from variant_test_data +-- !query schema +struct +-- !query output +value1 + + +-- !query +select special_chars_variant:`field.name`::string from variant_test_data +-- !query schema +struct +-- !query output +NULL + + +-- !query +select special_chars_variant:field_name::string from variant_test_data +-- !query schema +struct +-- !query output +value3 + + +-- !query +select multi_field_variant:scores[0]::int + multi_field_variant:scores[1]::int from variant_test_data +-- !query schema +struct<(CAST(variant_get(multi_field_variant, $.scores[0]) AS scores AS INT) + CAST(variant_get(multi_field_variant, $.scores[1]) AS scores AS INT)):int> +-- !query output +177 + + +-- !query +select count(*) from (select explode(cast(multi_field_variant:scores as array)) as score from variant_test_data) +-- !query schema +struct +-- !query output +3 + + +-- !query +select * from variant_test_data +-- !query schema +struct +-- !query output +{"price":30} {"price":12345.678} {"active":true,"age":30,"city":"New York","name":"John","scores":[85,92,78]} {"item":[{"model":"basic","price":6.12},{"model":"medium","price":9.24}]} [{"id":1,"name":"Alice"},{"id":2,"name":"Bob"},{"id":3,"name":"Charlie"}] {"metadata":{"nested":{"level":2,"value":"deep"},"tags":["important","urgent"],"version":"1.0"}} {"field-name":"value1","field.name":"value2","field_name":"value3"} + + +-- !query +select multi_field_variant:* from variant_test_data +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42601", + "messageParameters" : { + "error" : "'*'", + "hint" : "" + } +} + + +-- !query +select typeof(multi_field_variant:name) from variant_test_data +-- !query schema +struct +-- !query output +variant + + +-- !query +select typeof(multi_field_variant:age) from variant_test_data +-- !query schema +struct +-- !query output +variant + + +-- !query +select typeof(multi_field_variant:active) from variant_test_data +-- !query schema +struct +-- !query output +variant + + +-- !query +select typeof(multi_field_variant:scores) from variant_test_data +-- !query schema +struct +-- !query output +variant + + +-- !query +select isnull(multi_field_variant:missing_field) from variant_test_data +-- !query schema +struct<(variant_get(multi_field_variant, $.missing_field) AS missing_field IS NULL):boolean> +-- !query output +true + + +-- !query +select isnotnull(multi_field_variant:name) from variant_test_data +-- !query schema +struct<(variant_get(multi_field_variant, $.name) AS name IS NOT NULL):boolean> +-- !query output +true + + +-- !query +select coalesce(multi_field_variant:missing_field, 'default_value') from variant_test_data +-- !query schema +struct +-- !query output +"default_value" diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 375eb55d2e482..89d34a3fa1977 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, InvokeProcedures, ReplaceCharWithVarchar, ResolveDataSource, ResolveSessionCatalog, ResolveTranspose} import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogWithListener, InvalidUDFClassException} -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.{Expression, ExtractSemiStructuredFields} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.classic.{SparkSession, Strategy} @@ -133,6 +133,7 @@ class HiveSessionStateBuilder( new DetermineTableStats(session) +: new ResolveTranspose(session) +: new InvokeProcedures(session) +: + ExtractSemiStructuredFields +: customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =