diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-07-26 18:46:12 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-07-26 18:46:12 +0800 |
commit | a2abb583caaec9a2cecd5d65b05d172fc096c125 (patch) | |
tree | edaf8913f4f586a85be22e728fc41bb7bbb8066f /sql | |
parent | 4c9695598ee00f68aff4eb32d4629edf6facb29f (diff) | |
download | spark-a2abb583caaec9a2cecd5d65b05d172fc096c125.tar.gz spark-a2abb583caaec9a2cecd5d65b05d172fc096c125.tar.bz2 spark-a2abb583caaec9a2cecd5d65b05d172fc096c125.zip |
[SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables
## What changes were proposed in this pull request?
Currently there are 2 inconsistence:
1. for data source table, we only print partition names, for hive table, we also print partition schema. After this PR, we will always print schema
2. if column doesn't have comment, data source table will print empty string, hive table will print null. After this PR, we will always print null
## How was this patch tested?
new test in `HiveDDLSuite`
Author: Wenchen Fan <wenchen@databricks.com>
Closes #14302 from cloud-fan/minor3.
Diffstat (limited to 'sql')
5 files changed, 47 insertions, 31 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index c6daa95286..82633803fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -439,11 +439,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = { if (DDLUtils.isDatasourceTable(table)) { - val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table) - if (partCols.nonEmpty) { + val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table) + val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table) + for (schema <- userSpecifiedSchema if partColNames.nonEmpty) { append(buffer, "# Partition Information", "", "") - append(buffer, s"# ${output.head.name}", "", "") - partCols.foreach(col => append(buffer, col, "", "")) + append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) + describeSchema(StructType(partColNames.map(schema(_))), buffer) } } else { if (table.partitionColumns.nonEmpty) { @@ -525,8 +526,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = { schema.foreach { column => - val comment = column.getComment().getOrElse("") - append(buffer, column.name, column.dataType.simpleString, comment) + append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index d0ad3190e0..e535d4dc88 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -97,21 +97,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext { "describe ddlPeople", Seq( Row("intType", "int", "test comment test1"), - Row("stringType", "string", ""), - Row("dateType", "date", ""), - Row("timestampType", "timestamp", ""), - Row("doubleType", "double", ""), - Row("bigintType", "bigint", ""), - Row("tinyintType", "tinyint", ""), - Row("decimalType", "decimal(10,0)", ""), - Row("fixedDecimalType", "decimal(5,1)", ""), - Row("binaryType", "binary", ""), - Row("booleanType", "boolean", ""), - Row("smallIntType", "smallint", ""), - Row("floatType", "float", ""), - Row("mapType", "map<string,string>", ""), - Row("arrayType", "array<string>", ""), - Row("structType", "struct<f1:string,f2:int>", "") + Row("stringType", "string", null), + Row("dateType", "date", null), + Row("timestampType", "timestamp", null), + Row("doubleType", "double", null), + Row("bigintType", "bigint", null), + Row("tinyintType", "tinyint", null), + Row("decimalType", "decimal(10,0)", null), + Row("fixedDecimalType", "decimal(5,1)", null), + Row("binaryType", "binary", null), + Row("booleanType", "boolean", null), + Row("smallIntType", "smallint", null), + Row("floatType", "float", null), + Row("mapType", "map<string,string>", null), + Row("arrayType", "array<string>", null), + Row("structType", "struct<f1:string,f2:int>", null) )) test("SPARK-7686 DescribeCommand should have correct physical plan output attributes") { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 22f8c0f19c..111fb8b348 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -748,7 +748,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv assert(schema === actualSchema) // Checks the DESCRIBE output. - checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", "") :: Nil) + checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", null) :: Nil) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index fb5c9948a5..d15e11a7ff 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -431,6 +431,22 @@ class HiveDDLSuite } } + test("desc table for Hive table - partitioned table") { + withTable("tbl") { + sql("CREATE TABLE tbl(a int) PARTITIONED BY (b int)") + + assert(sql("DESC tbl").collect().containsSlice( + Seq( + Row("a", "int", null), + Row("b", "int", null), + Row("# Partition Information", "", ""), + Row("# col_name", "data_type", "comment"), + Row("b", "int", null) + ) + )) + } + } + test("desc table for data source table using Hive Metastore") { assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive") val tabName = "tab1" @@ -621,7 +637,7 @@ class HiveDDLSuite val desc = sql("DESC FORMATTED t1").collect().toSeq - assert(desc.contains(Row("id", "bigint", ""))) + assert(desc.contains(Row("id", "bigint", null))) } } } @@ -638,13 +654,13 @@ class HiveDDLSuite assert(formattedDesc.containsSlice( Seq( - Row("a", "bigint", ""), - Row("b", "bigint", ""), - Row("c", "bigint", ""), - Row("d", "bigint", ""), + Row("a", "bigint", null), + Row("b", "bigint", null), + Row("c", "bigint", null), + Row("d", "bigint", null), Row("# Partition Information", "", ""), - Row("# col_name", "", ""), - Row("d", "", ""), + Row("# col_name", "data_type", "comment"), + Row("d", "bigint", null), Row("", "", ""), Row("# Detailed Table Information", "", ""), Row("Database:", "default", "") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index f8c55ec456..31283b9fd6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -834,8 +834,8 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { assertResult( Array( - Row("a", "int", ""), - Row("b", "string", "")) + Row("a", "int", null), + Row("b", "string", null)) ) { sql("DESCRIBE test_describe_commands2") .select('col_name, 'data_type, 'comment) |