diff options
author | Xiao Li <gatorsmile@gmail.com> | 2017-04-03 23:30:12 -0700 |
---|---|---|
committer | Xiao Li <gatorsmile@gmail.com> | 2017-04-03 23:30:12 -0700 |
commit | 51d3c854c54369aec1bfd55cefcd080dcd178d5f (patch) | |
tree | 8c83dc8698f16b4af724fe4f8a1a7901224bdffb /sql/hive | |
parent | 3bfb639cb7352aec572ef6686d3471bd78748ffa (diff) | |
download | spark-51d3c854c54369aec1bfd55cefcd080dcd178d5f.tar.gz spark-51d3c854c54369aec1bfd55cefcd080dcd178d5f.tar.bz2 spark-51d3c854c54369aec1bfd55cefcd080dcd178d5f.zip |
[SPARK-20067][SQL] Unify and Clean Up Desc Commands Using Catalog Interface
### What changes were proposed in this pull request?
This PR is to unify and clean up the outputs of `DESC EXTENDED/FORMATTED` and `SHOW TABLE EXTENDED` by moving the logics into the Catalog interface. The output formats are improved. We also add the missing attributes. It impacts the DDL commands like `SHOW TABLE EXTENDED`, `DESC EXTENDED` and `DESC FORMATTED`.
In addition, by following what we did in Dataset API `printSchema`, we can use `treeString` to show the schema in the more readable way.
Below is the current way:
```
Schema: STRUCT<`a`: STRING (nullable = true), `b`: INT (nullable = true), `c`: STRING (nullable = true), `d`: STRING (nullable = true)>
```
After the change, it should look like
```
Schema: root
|-- a: string (nullable = true)
|-- b: integer (nullable = true)
|-- c: string (nullable = true)
|-- d: string (nullable = true)
```
### How was this patch tested?
`describe.sql` and `show-tables.sql`
Author: Xiao Li <gatorsmile@gmail.com>
Closes #17394 from gatorsmile/descFollowUp.
Diffstat (limited to 'sql/hive')
6 files changed, 32 insertions, 313 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 55e02acfa4..b554694815 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -767,9 +767,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv sessionState.refreshTable(tableName) val actualSchema = table(tableName).schema assert(schema === actualSchema) - - // Checks the DESCRIBE output. - checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", null) :: Nil) } } @@ -1381,7 +1378,10 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv checkAnswer(spark.table("old"), Row(1, "a")) - checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil) + val expectedSchema = StructType(Seq( + StructField("i", IntegerType, nullable = true), + StructField("j", StringType, nullable = true))) + assert(table("old").schema === expectedSchema) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 536ca8fd9d..e45cf977bf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -207,6 +207,7 @@ abstract class HiveComparisonTest // This list contains indicators for those lines which do not have actual results and we // want to ignore. lazy val ignoredLineIndicators = Seq( + "# Detailed Table Information", "# Partition Information", "# col_name" ) @@ -358,7 +359,7 @@ abstract class HiveComparisonTest stringToFile(new File(failedDirectory, testCaseName), errorMessage + consoleTestCase) fail(errorMessage) } - }.toSeq + } (queryList, hiveResults, catalystResults).zipped.foreach { case (query, hive, (hiveQuery, catalyst)) => @@ -369,6 +370,7 @@ abstract class HiveComparisonTest if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && (!hiveQuery.logical.isInstanceOf[ShowFunctionsCommand]) && (!hiveQuery.logical.isInstanceOf[DescribeFunctionCommand]) && + (!hiveQuery.logical.isInstanceOf[DescribeTableCommand]) && preparedHive != catalyst) { val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f0a995c274..3906968aaf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -708,23 +708,6 @@ class HiveDDLSuite } } - test("desc table for Hive table") { - withTable("tab1") { - val tabName = "tab1" - sql(s"CREATE TABLE $tabName(c1 int)") - - assert(sql(s"DESC $tabName").collect().length == 1) - - assert( - sql(s"DESC FORMATTED $tabName").collect() - .exists(_.getString(0) == "# Storage Information")) - - assert( - sql(s"DESC EXTENDED $tabName").collect() - .exists(_.getString(0) == "# Detailed Table Information")) - } - } - test("desc table for Hive table - partitioned table") { withTable("tbl") { sql("CREATE TABLE tbl(a int) PARTITIONED BY (b int)") @@ -741,23 +724,6 @@ class HiveDDLSuite } } - test("desc formatted table for permanent view") { - withTable("tbl") { - withView("view1") { - sql("CREATE TABLE tbl(a int)") - sql("CREATE VIEW view1 AS SELECT * FROM tbl") - assert(sql("DESC FORMATTED view1").collect().containsSlice( - Seq( - Row("# View Information", "", ""), - Row("View Text:", "SELECT * FROM tbl", ""), - Row("View Default Database:", "default", ""), - Row("View Query Output Columns:", "[a]", "") - ) - )) - } - } - } - test("desc table for data source table using Hive Metastore") { assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") val tabName = "tab1" @@ -766,7 +732,7 @@ class HiveDDLSuite checkAnswer( sql(s"DESC $tabName").select("col_name", "data_type", "comment"), - Row("a", "int", "test") + Row("# col_name", "data_type", "comment") :: Row("a", "int", "test") :: Nil ) } } @@ -1218,23 +1184,6 @@ class HiveDDLSuite sql(s"SELECT * FROM ${targetTable.identifier}")) } - test("desc table for data source table") { - withTable("tab1") { - val tabName = "tab1" - spark.range(1).write.format("json").saveAsTable(tabName) - - assert(sql(s"DESC $tabName").collect().length == 1) - - assert( - sql(s"DESC FORMATTED $tabName").collect() - .exists(_.getString(0) == "# Storage Information")) - - assert( - sql(s"DESC EXTENDED $tabName").collect() - .exists(_.getString(0) == "# Detailed Table Information")) - } - } - test("create table with the same name as an index table") { val tabName = "tab1" val indexName = tabName + "_index" @@ -1320,46 +1269,6 @@ class HiveDDLSuite } } - test("desc table for data source table - partitioned bucketed table") { - withTable("t1") { - spark - .range(1).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write - .bucketBy(2, "b").sortBy("c").partitionBy("d") - .saveAsTable("t1") - - val formattedDesc = sql("DESC FORMATTED t1").collect() - - assert(formattedDesc.containsSlice( - Seq( - Row("a", "bigint", null), - Row("b", "bigint", null), - Row("c", "bigint", null), - Row("d", "bigint", null), - Row("# Partition Information", "", ""), - Row("# col_name", "data_type", "comment"), - Row("d", "bigint", null), - Row("", "", ""), - Row("# Detailed Table Information", "", ""), - Row("Database:", "default", "") - ) - )) - - assert(formattedDesc.containsSlice( - Seq( - Row("Table Type:", "MANAGED", "") - ) - )) - - assert(formattedDesc.containsSlice( - Seq( - Row("Num Buckets:", "2", ""), - Row("Bucket Columns:", "[b]", ""), - Row("Sort Columns:", "[c]", "") - ) - )) - } - } - test("datasource and statistics table property keys are not allowed") { import org.apache.spark.sql.hive.HiveExternalCatalog.DATASOURCE_PREFIX import org.apache.spark.sql.hive.HiveExternalCatalog.STATISTICS_PREFIX diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala deleted file mode 100644 index 0e89e990e5..0000000000 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.execution - -import org.apache.spark.sql.{QueryTest, Row} -import org.apache.spark.sql.hive.test.TestHiveSingleton - -/** - * A set of tests that validates commands can also be queried by like a table - */ -class HiveOperatorQueryableSuite extends QueryTest with TestHiveSingleton { - import spark._ - - test("SPARK-5324 query result of describe command") { - hiveContext.loadTestTable("src") - - // Creates a temporary view with the output of a describe command - sql("desc src").createOrReplaceTempView("mydesc") - checkAnswer( - sql("desc mydesc"), - Seq( - Row("col_name", "string", "name of the column"), - Row("data_type", "string", "data type of the column"), - Row("comment", "string", "comment of the column"))) - - checkAnswer( - sql("select * from mydesc"), - Seq( - Row("key", "int", null), - Row("value", "string", null))) - - checkAnswer( - sql("select col_name, data_type, comment from mydesc"), - Seq( - Row("key", "int", null), - Row("value", "string", null))) - } -} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index dd278f683a..65a902fc54 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -789,62 +789,6 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd assert(Try(q0.count()).isSuccess) } - test("DESCRIBE commands") { - sql(s"CREATE TABLE test_describe_commands1 (key INT, value STRING) PARTITIONED BY (dt STRING)") - - sql( - """FROM src INSERT OVERWRITE TABLE test_describe_commands1 PARTITION (dt='2008-06-08') - |SELECT key, value - """.stripMargin) - - // Describe a table - assertResult( - Array( - Row("key", "int", null), - Row("value", "string", null), - Row("dt", "string", null), - Row("# Partition Information", "", ""), - Row("# col_name", "data_type", "comment"), - Row("dt", "string", null)) - ) { - sql("DESCRIBE test_describe_commands1") - .select('col_name, 'data_type, 'comment) - .collect() - } - - // Describe a table with a fully qualified table name - assertResult( - Array( - Row("key", "int", null), - Row("value", "string", null), - Row("dt", "string", null), - Row("# Partition Information", "", ""), - Row("# col_name", "data_type", "comment"), - Row("dt", "string", null)) - ) { - sql("DESCRIBE default.test_describe_commands1") - .select('col_name, 'data_type, 'comment) - .collect() - } - - // Describe a temporary view. - val testData = - TestHive.sparkContext.parallelize( - TestData(1, "str1") :: - TestData(1, "str2") :: Nil) - testData.toDF().createOrReplaceTempView("test_describe_commands2") - - assertResult( - Array( - Row("a", "int", null), - Row("b", "string", null)) - ) { - sql("DESCRIBE test_describe_commands2") - .select('col_name, 'data_type, 'comment) - .collect() - } - } - test("SPARK-2263: Insert Map<K, V> values") { sql("CREATE TABLE m(value MAP<INT, STRING>)") sql("INSERT OVERWRITE TABLE m SELECT MAP(key, value) FROM src LIMIT 10") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 55ff4bb115..d012797e19 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -363,79 +363,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } - test("describe partition") { - withTable("partitioned_table") { - sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)") - sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)") - - checkKeywordsExist(sql("DESC partitioned_table PARTITION (c='Us', d=1)"), - "# Partition Information", - "# col_name") - - checkKeywordsExist(sql("DESC EXTENDED partitioned_table PARTITION (c='Us', d=1)"), - "# Partition Information", - "# col_name", - "Detailed Partition Information CatalogPartition(", - "Partition Values: [c=Us, d=1]", - "Storage(Location:", - "Partition Parameters") - - checkKeywordsExist(sql("DESC FORMATTED partitioned_table PARTITION (c='Us', d=1)"), - "# Partition Information", - "# col_name", - "# Detailed Partition Information", - "Partition Value:", - "Database:", - "Table:", - "Location:", - "Partition Parameters:", - "# Storage Information") - } - } - - test("describe partition - error handling") { - withTable("partitioned_table", "datasource_table") { - sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)") - sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)") - - val m = intercept[NoSuchPartitionException] { - sql("DESC partitioned_table PARTITION (c='Us', d=2)") - }.getMessage() - assert(m.contains("Partition not found in table")) - - val m2 = intercept[AnalysisException] { - sql("DESC partitioned_table PARTITION (c='Us')") - }.getMessage() - assert(m2.contains("Partition spec is invalid")) - - val m3 = intercept[ParseException] { - sql("DESC partitioned_table PARTITION (c='Us', d)") - }.getMessage() - assert(m3.contains("PARTITION specification is incomplete: `d`")) - - spark - .range(1).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write - .partitionBy("d") - .saveAsTable("datasource_table") - - sql("DESC datasource_table PARTITION (d=0)") - - val m5 = intercept[AnalysisException] { - spark.range(10).select('id as 'a, 'id as 'b).createTempView("view1") - sql("DESC view1 PARTITION (c='Us', d=1)") - }.getMessage() - assert(m5.contains("DESC PARTITION is not allowed on a temporary view")) - - withView("permanent_view") { - val m = intercept[AnalysisException] { - sql("CREATE VIEW permanent_view AS SELECT * FROM partitioned_table") - sql("DESC permanent_view PARTITION (c='Us', d=1)") - }.getMessage() - assert(m.contains("DESC PARTITION is not allowed on a view")) - } - } - } - test("SPARK-5371: union with null and sum") { val df = Seq((1, 1)).toDF("c1", "c2") df.createOrReplaceTempView("table1") @@ -676,7 +603,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } test("CTAS with serde") { - sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect() + sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") sql( """CREATE TABLE ctas2 | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" @@ -686,86 +613,76 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { | AS | SELECT key, value | FROM src - | ORDER BY key, value""".stripMargin).collect() + | ORDER BY key, value""".stripMargin) + + val storageCtas2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("ctas2")).storage + assert(storageCtas2.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) + assert(storageCtas2.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) + assert(storageCtas2.serde == Some("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) + sql( """CREATE TABLE ctas3 | ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\012' | STORED AS textfile AS | SELECT key, value | FROM src - | ORDER BY key, value""".stripMargin).collect() + | ORDER BY key, value""".stripMargin) // the table schema may like (key: integer, value: string) sql( """CREATE TABLE IF NOT EXISTS ctas4 AS - | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin).collect() + | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin) // do nothing cause the table ctas4 already existed. sql( """CREATE TABLE IF NOT EXISTS ctas4 AS - | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() + | SELECT key, value FROM src ORDER BY key, value""".stripMargin) checkAnswer( sql("SELECT k, value FROM ctas1 ORDER BY k, value"), - sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq) + sql("SELECT key, value FROM src ORDER BY key, value")) checkAnswer( sql("SELECT key, value FROM ctas2 ORDER BY key, value"), sql( """ SELECT key, value FROM src - ORDER BY key, value""").collect().toSeq) + ORDER BY key, value""")) checkAnswer( sql("SELECT key, value FROM ctas3 ORDER BY key, value"), sql( """ SELECT key, value FROM src - ORDER BY key, value""").collect().toSeq) + ORDER BY key, value""")) intercept[AnalysisException] { sql( """CREATE TABLE ctas4 AS - | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() + | SELECT key, value FROM src ORDER BY key, value""".stripMargin) } checkAnswer( sql("SELECT key, value FROM ctas4 ORDER BY key, value"), sql("SELECT key, value FROM ctas4 LIMIT 1").collect().toSeq) - /* - Disabled because our describe table does not output the serde information right now. - checkKeywordsExist(sql("DESC EXTENDED ctas2"), - "name:key", "type:string", "name:value", "ctas2", - "org.apache.hadoop.hive.ql.io.RCFileInputFormat", - "org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22", "MANAGED_TABLE" - ) - */ - sql( """CREATE TABLE ctas5 | STORED AS parquet AS | SELECT key, value | FROM src - | ORDER BY key, value""".stripMargin).collect() + | ORDER BY key, value""".stripMargin) + val storageCtas5 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("ctas5")).storage + assert(storageCtas5.inputFormat == + Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat")) + assert(storageCtas5.outputFormat == + Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat")) + assert(storageCtas5.serde == + Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) - /* - Disabled because our describe table does not output the serde information right now. - withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") { - checkKeywordsExist(sql("DESC EXTENDED ctas5"), - "name:key", "type:string", "name:value", "ctas5", - "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", - "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", - "MANAGED_TABLE" - ) - } - */ // use the Hive SerDe for parquet tables withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") { checkAnswer( sql("SELECT key, value FROM ctas5 ORDER BY key, value"), - sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq) + sql("SELECT key, value FROM src ORDER BY key, value")) } } |