diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 5d4363f945bf86c868adec871b4bb912927d8e99..f741dcfbf2002ac6a08ac55b4f7bbcb694d6f01b 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -81,6 +81,7 @@ statement (PARTITIONED BY partitionColumnNames=identifierList)? bucketSpec? locationSpec? (COMMENT comment=STRING)? + (TBLPROPERTIES tableProps=tablePropertyList)? (AS? query)? #createTable | createTableHeader ('(' columns=colTypeList ')')? (COMMENT comment=STRING)? diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 8379e740a0717e57aa8b6d78ea6bbc182acc921b..d3f6ab5654689b9e5fab5f96fae410408bde0362 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -385,7 +385,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { * ] * [LOCATION path] * [COMMENT table_comment] - * [AS select_statement]; + * [TBLPROPERTIES (property_name=property_value, ...)] + * [[AS] select_statement]; * }}} */ override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) { @@ -400,6 +401,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { Option(ctx.partitionColumnNames) .map(visitIdentifierList(_).toArray) .getOrElse(Array.empty[String]) + val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty) val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec) val location = Option(ctx.locationSpec).map(visitLocationSpec) @@ -410,7 +412,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " + "you can only specify one of them.", ctx) } - val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI(_))) + val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI)) val tableType = if (customLocation.isDefined) { CatalogTableType.EXTERNAL @@ -426,6 +428,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) { provider = Some(provider), partitionColumnNames = partitionColumnNames, bucketSpec = bucketSpec, + properties = properties, comment = Option(ctx.comment).map(string)) // Determine the storage mode. diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql index a222e11916cda9727120c61b054f05e6221b2bdc..f26d5efec076cce90fd434c02ac0a44535f9753f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql @@ -1,7 +1,8 @@ CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet OPTIONS (a '1', b '2') PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS - COMMENT 'table_comment'; + COMMENT 'table_comment' + TBLPROPERTIES (t 'test'); CREATE TEMPORARY VIEW temp_v AS SELECT * FROM t; diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out index e75cc4448a1ea8208644ee5b596cd82546ee4d40..3833c42bdfecf25765597133cfdc1352ad16ea8c 100644 --- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out @@ -128,6 +128,7 @@ two 2 two 2 one 1 two 2 two 2 two 2 three 3 two 2 two 2 two 2 two 2 two 2 + -- !query 12 SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k) -- !query 12 schema diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out index b91f2c09f3cd4036c99da9470aa122e9b7b59138..8c908b7625056471695b6b4c57589ac199a0af56 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out @@ -7,6 +7,7 @@ CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet OPTIONS (a '1', b '2') PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS COMMENT 'table_comment' + TBLPROPERTIES (t 'test') -- !query 0 schema struct<> -- !query 0 output @@ -129,7 +130,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment -Table Properties [e=3] +Table Properties [t=test, e=3] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -161,7 +162,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment -Table Properties [e=3] +Table Properties [t=test, e=3] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -201,6 +202,7 @@ Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] Comment table_comment +Table Properties [t=test] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog @@ -239,6 +241,7 @@ Provider parquet Num Buckets 2 Bucket Columns [`a`] Sort Columns [`b`] +Table Properties [t=test] Location [not included in comparison]sql/core/spark-warehouse/t Storage Properties [a=1, b=2] Partition Provider Catalog diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala index 223c3d7729a5075c8b0d620327acbd162406975c..78c1e5dae566dbf4bd948835fb5c9af96689a16c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala @@ -117,4 +117,12 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext { "select partcol1, max(partcol2) from srcpart where partcol1 = 0 group by rollup (partcol1)", "select partcol2 from (select partcol2 from srcpart where partcol1 = 0 union all " + "select partcol2 from srcpart where partcol1 = 1) t group by partcol2") + + test("SPARK-21884 Fix StackOverflowError on MetadataOnlyQuery") { + withTable("t_1000") { + sql("CREATE TABLE t_1000 (a INT, p INT) USING PARQUET PARTITIONED BY (p)") + (1 to 1000).foreach(p => sql(s"ALTER TABLE t_1000 ADD PARTITION (p=$p)")) + sql("SELECT COUNT(DISTINCT p) FROM t_1000").collect() + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 70df7607a713f248ece0f4c5f03a24ff341c97af..4ee38215f5973a2c80b92adcd03a94376ab1f6b4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -473,6 +473,26 @@ class DDLParserSuite extends PlanTest with SharedSQLContext { } } + test("create table - with table properties") { + val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet TBLPROPERTIES('test' = 'test')" + + val expectedTableDesc = CatalogTable( + identifier = TableIdentifier("my_tab"), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty, + schema = new StructType().add("a", IntegerType).add("b", StringType), + provider = Some("parquet"), + properties = Map("test" -> "test")) + + parser.parsePlan(sql) match { + case CreateTable(tableDesc, _, None) => + assert(tableDesc == expectedTableDesc.copy(createTime = tableDesc.createTime)) + case other => + fail(s"Expected to parse ${classOf[CreateTableCommand].getClass.getName} from query," + + s"got ${other.getClass.getName}: $sql") + } + } + test("create table - with location") { val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"