From 74b47845ea08440058e5f93ff6135e94e78ae470 Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro <yamamuro@apache.org> Date: Sun, 6 Aug 2017 10:14:45 -0700 Subject: [PATCH] [SPARK-20963][SQL][FOLLOW-UP] Use UnresolvedSubqueryColumnAliases for visitTableName ## What changes were proposed in this pull request? This pr (follow-up of #18772) used `UnresolvedSubqueryColumnAliases` for `visitTableName` in `AstBuilder`, which is a new unresolved `LogicalPlan` implemented in #18185. ## How was this patch tested? Existing tests Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #18857 from maropu/SPARK-20963-FOLLOWUP. --- .../sql/catalyst/analysis/Analyzer.scala | 20 +------------------ .../sql/catalyst/analysis/unresolved.scala | 11 +--------- .../sql/catalyst/parser/AstBuilder.scala | 13 ++---------- .../sql/catalyst/analysis/AnalysisSuite.scala | 14 +++++++------ .../sql/catalyst/parser/PlanParserSuite.scala | 6 ++++-- .../sql-tests/results/table-aliases.sql.out | 6 +++--- .../benchmark/TPCDSQueryBenchmark.scala | 4 ++-- .../apache/spark/sql/hive/test/TestHive.scala | 2 +- 8 files changed, 22 insertions(+), 54 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a6d297cfd6..8628a89aa9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -592,25 +592,7 @@ class Analyzer( case u: UnresolvedRelation if !isRunningDirectlyOnFiles(u.tableIdentifier) => val defaultDatabase = AnalysisContext.get.defaultDatabase val foundRelation = lookupTableFromCatalog(u, defaultDatabase) - - // Add `Project` to rename output column names if a query has alias names: - // e.g., SELECT col1, col2 FROM testData AS t(col1, col2) - val relation = if (u.outputColumnNames.nonEmpty) { - val outputAttrs = foundRelation.output - // Checks if the number of the aliases equals to the number of columns in the table. - if (u.outputColumnNames.size != outputAttrs.size) { - u.failAnalysis(s"Number of column aliases does not match number of columns. " + - s"Table name: ${u.tableName}; number of column aliases: " + - s"${u.outputColumnNames.size}; number of columns: ${outputAttrs.size}.") - } - val aliases = outputAttrs.zip(u.outputColumnNames).map { - case (attr, name) => Alias(attr, name)() - } - Project(aliases, foundRelation) - } else { - foundRelation - } - resolveRelation(relation) + resolveRelation(foundRelation) // The view's child should be a logical plan parsed from the `desc.viewText`, the variable // `viewText` should be defined, or else we throw an error on the generation of the View // operator. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index b7a704dc84..d336f801d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -37,19 +37,10 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str /** * Holds the name of a relation that has yet to be looked up in a catalog. - * We could add alias names for columns in a relation: - * {{{ - * // Assign alias names - * SELECT col1, col2 FROM testData AS t(col1, col2); - * }}} * * @param tableIdentifier table name - * @param outputColumnNames alias names of columns. If these names given, an analyzer adds - * [[Project]] to rename the columns. */ -case class UnresolvedRelation( - tableIdentifier: TableIdentifier, - outputColumnNames: Seq[String] = Seq.empty) +case class UnresolvedRelation(tableIdentifier: TableIdentifier) extends LeafNode { /** Returns a `.` separated name for this relation. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 532d6ee3f4..22c5484b76 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -681,17 +681,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitTableName(ctx: TableNameContext): LogicalPlan = withOrigin(ctx) { val tableId = visitTableIdentifier(ctx.tableIdentifier) - val table = if (ctx.tableAlias.identifierList != null) { - UnresolvedRelation(tableId, visitIdentifierList(ctx.tableAlias.identifierList)) - } else { - UnresolvedRelation(tableId) - } - val tableWithAlias = if (ctx.tableAlias.strictIdentifier != null) { - SubqueryAlias(ctx.tableAlias.strictIdentifier.getText, table) - } else { - table - } - tableWithAlias.optionalMap(ctx.sample)(withSample) + val table = mayApplyAliasPlan(ctx.tableAlias, UnresolvedRelation(tableId)) + table.optionalMap(ctx.sample)(withSample) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 4195e955cb..e5fcd60b2d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -457,18 +457,20 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers { test("SPARK-20841 Support table column aliases in FROM clause") { def tableColumnsWithAliases(outputNames: Seq[String]): LogicalPlan = { - SubqueryAlias("t", UnresolvedRelation(TableIdentifier("TaBlE3"), outputNames)) - .select(star()) + UnresolvedSubqueryColumnAliases( + outputNames, + SubqueryAlias("t", UnresolvedRelation(TableIdentifier("TaBlE3"))) + ).select(star()) } assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil)) assertAnalysisError( tableColumnsWithAliases("col1" :: Nil), - Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " + - "number of column aliases: 1; number of columns: 4.")) + Seq("Number of column aliases does not match number of columns. " + + "Number of column aliases: 1; number of columns: 4.")) assertAnalysisError( tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil), - Seq("Number of column aliases does not match number of columns. Table name: TaBlE3; " + - "number of column aliases: 5; number of columns: 4.")) + Seq("Number of column aliases does not match number of columns. " + + "Number of column aliases: 5; number of columns: 4.")) } test("SPARK-20962 Support subquery column aliases in FROM clause") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 157d11dbd1..b0d2fb26a6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -491,8 +491,10 @@ class PlanParserSuite extends AnalysisTest { test("SPARK-20841 Support table column aliases in FROM clause") { assertEqual( "SELECT * FROM testData AS t(col1, col2)", - SubqueryAlias("t", UnresolvedRelation(TableIdentifier("testData"), Seq("col1", "col2"))) - .select(star())) + UnresolvedSubqueryColumnAliases( + Seq("col1", "col2"), + SubqueryAlias("t", UnresolvedRelation(TableIdentifier("testData"))) + ).select(star())) } test("SPARK-20962 Support subquery column aliases in FROM clause") { diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out index 53b069cd39..1a2bd5ea91 100644 --- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out @@ -42,7 +42,7 @@ SELECT * FROM testData AS t(col1, col2, col3) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -Number of column aliases does not match number of columns. Table name: testData; number of column aliases: 3; number of columns: 2.; line 1 pos 14 +Number of column aliases does not match number of columns. Number of column aliases: 3; number of columns: 2.; line 1 pos 14 -- !query 5 @@ -51,7 +51,7 @@ SELECT * FROM testData AS t(col1) struct<> -- !query 5 output org.apache.spark.sql.AnalysisException -Number of column aliases does not match number of columns. Table name: testData; number of column aliases: 1; number of columns: 2.; line 1 pos 14 +Number of column aliases does not match number of columns. Number of column aliases: 1; number of columns: 2.; line 1 pos 14 -- !query 6 @@ -60,7 +60,7 @@ SELECT a AS col1, b AS col2 FROM testData AS t(c, d) struct<> -- !query 6 output org.apache.spark.sql.AnalysisException -cannot resolve '`a`' given input columns: [t.c, t.d]; line 1 pos 7 +cannot resolve '`a`' given input columns: [c, d]; line 1 pos 7 -- !query 7 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 6a5b74b01d..d2d013682c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -74,13 +74,13 @@ object TPCDSQueryBenchmark { // per-row processing time for those cases. val queryRelations = scala.collection.mutable.HashSet[String]() spark.sql(queryString).queryExecution.logical.map { - case UnresolvedRelation(t: TableIdentifier, _) => + case UnresolvedRelation(t: TableIdentifier) => queryRelations.add(t.table) case lp: LogicalPlan => lp.expressions.foreach { _ foreach { case subquery: SubqueryExpression => subquery.plan.foreach { - case UnresolvedRelation(t: TableIdentifier, _) => + case UnresolvedRelation(t: TableIdentifier) => queryRelations.add(t.table) case _ => } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 3a5c0c397b..9e15baa4b2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -554,7 +554,7 @@ private[hive] class TestHiveQueryExecution( // Make sure any test tables referenced are loaded. val referencedTables = describedTables ++ - logical.collect { case UnresolvedRelation(tableIdent, _) => tableIdent.table } + logical.collect { case UnresolvedRelation(tableIdent) => tableIdent.table } val resolver = sparkSession.sessionState.conf.resolver val referencedTestTables = sparkSession.testTables.keys.filter { testTable => referencedTables.exists(resolver(_, testTable)) -- GitLab