[SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema

#### What changes were proposed in this pull request? If we create a table pointing to a parquet/json datasets without specifying the schema, describe table command does not show the schema at all. It only shows `# Schema of this table is inferred at runtime`. In 1.6, describe table does show the schema of such a table. ~~For data source tables, to infer the schema, we need to load the data source tables at runtime. Thus, this PR calls the function `lookupRelation`.~~ For data source tables, we infer the schema before table creation. Thus, this PR set the inferred schema as the table schema when table creation. #### How was this patch tested? Added test cases Author: gatorsmile <gatorsmile@gmail.com> Closes #14148 from gatorsmile/describeSchema.
author: gatorsmile <gatorsmile@gmail.com> 2016-07-13 15:23:37 -0700
committer: Yin Huai <yhuai@databricks.com> 2016-07-13 15:23:37 -0700
commit: c5ec879828369ec1d21acd7f18a792306634ff74 (patch)
tree: 4a50a0a09e2143e7163f7854009cbec4c959bcdb
parent: fb2e8eeb0b1e56bea535165f7a3bec6558b3f4a3 (diff)
download: spark-c5ec879828369ec1d21acd7f18a792306634ff74.tar.gz
spark-c5ec879828369ec1d21acd7f18a792306634ff74.tar.bz2
spark-c5ec879828369ec1d21acd7f18a792306634ff74.zip
2 files changed, 22 insertions, 22 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 5c815df0de..6651c33a3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
     } else {
       val metadata = catalog.getTableMetadata(table)
 
+      if (DDLUtils.isDatasourceTable(metadata)) {
+        DDLUtils.getSchemaFromTableProperties(metadata) match {
+          case Some(userSpecifiedSchema) => describeSchema(userSpecifiedSchema, result)
+          case None => describeSchema(catalog.lookupRelation(table).schema, result)
+        }
+      } else {
+        describeSchema(metadata.schema, result)
+      }
+
       if (isExtended) {
         describeExtended(metadata, result)
       } else if (isFormatted) {
         describeFormatted(metadata, result)
       } else {
-        describe(metadata, result)
+        describePartitionInfo(metadata, result)
       }
     }
 
     result
   }
 
-  // Shows data columns and partitioned columns (if any)
-  private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
+  private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
     if (DDLUtils.isDatasourceTable(table)) {
-      val schema = DDLUtils.getSchemaFromTableProperties(table)
-
-      if (schema.isEmpty) {
-        append(buffer, "# Schema of this table is inferred at runtime", "", "")
-      } else {
-        schema.foreach(describeSchema(_, buffer))
-      }
-
       val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
       if (partCols.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
@@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
         partCols.foreach(col => append(buffer, col, "", ""))
       }
     } else {
-      describeSchema(table.schema, buffer)
-
       if (table.partitionColumns.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
         append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
@@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
   }
 
   private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
-    describe(table, buffer)
+    describePartitionInfo(table, buffer)
 
     append(buffer, "", "", "")
     append(buffer, "# Detailed Table Information", table.toString, "")
   }
 
   private def describeFormatted(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
-    describe(table, buffer)
+    describePartitionInfo(table, buffer)
 
     append(buffer, "", "", "")
     append(buffer, "# Detailed Table Information", "", "")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 343d7bae98..9228242021 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -612,15 +612,17 @@ class HiveDDLSuite
   }
 
   test("desc table for data source table - no user-defined schema") {
-    withTable("t1") {
-      withTempPath { dir =>
-        val path = dir.getCanonicalPath
-        spark.range(1).write.parquet(path)
-        sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
+    Seq("parquet", "json", "orc").foreach { fileFormat =>
+      withTable("t1") {
+        withTempPath { dir =>
+          val path = dir.getCanonicalPath
+          spark.range(1).write.format(fileFormat).save(path)
+          sql(s"CREATE TABLE t1 USING $fileFormat OPTIONS (PATH '$path')")
 
-        val desc = sql("DESC FORMATTED t1").collect().toSeq
+          val desc = sql("DESC FORMATTED t1").collect().toSeq
 
-        assert(desc.contains(Row("# Schema of this table is inferred at runtime", "", "")))
+          assert(desc.contains(Row("id", "bigint", "")))
+        }
       }
     }
   }
author	gatorsmile <gatorsmile@gmail.com>	2016-07-13 15:23:37 -0700
committer	Yin Huai <yhuai@databricks.com>	2016-07-13 15:23:37 -0700
commit	c5ec879828369ec1d21acd7f18a792306634ff74 (patch)
tree	4a50a0a09e2143e7163f7854009cbec4c959bcdb
parent	fb2e8eeb0b1e56bea535165f7a3bec6558b3f4a3 (diff)
download	spark-c5ec879828369ec1d21acd7f18a792306634ff74.tar.gz spark-c5ec879828369ec1d21acd7f18a792306634ff74.tar.bz2 spark-c5ec879828369ec1d21acd7f18a792306634ff74.zip