aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-07-13 15:23:37 -0700
committerYin Huai <yhuai@databricks.com>2016-07-13 15:23:37 -0700
commitc5ec879828369ec1d21acd7f18a792306634ff74 (patch)
tree4a50a0a09e2143e7163f7854009cbec4c959bcdb
parentfb2e8eeb0b1e56bea535165f7a3bec6558b3f4a3 (diff)
downloadspark-c5ec879828369ec1d21acd7f18a792306634ff74.tar.gz
spark-c5ec879828369ec1d21acd7f18a792306634ff74.tar.bz2
spark-c5ec879828369ec1d21acd7f18a792306634ff74.zip
[SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema
#### What changes were proposed in this pull request? If we create a table pointing to a parquet/json datasets without specifying the schema, describe table command does not show the schema at all. It only shows `# Schema of this table is inferred at runtime`. In 1.6, describe table does show the schema of such a table. ~~For data source tables, to infer the schema, we need to load the data source tables at runtime. Thus, this PR calls the function `lookupRelation`.~~ For data source tables, we infer the schema before table creation. Thus, this PR set the inferred schema as the table schema when table creation. #### How was this patch tested? Added test cases Author: gatorsmile <gatorsmile@gmail.com> Closes #14148 from gatorsmile/describeSchema.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala28
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala16
2 files changed, 22 insertions, 22 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 5c815df0de..6651c33a3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -413,29 +413,29 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
} else {
val metadata = catalog.getTableMetadata(table)
+ if (DDLUtils.isDatasourceTable(metadata)) {
+ DDLUtils.getSchemaFromTableProperties(metadata) match {
+ case Some(userSpecifiedSchema) => describeSchema(userSpecifiedSchema, result)
+ case None => describeSchema(catalog.lookupRelation(table).schema, result)
+ }
+ } else {
+ describeSchema(metadata.schema, result)
+ }
+
if (isExtended) {
describeExtended(metadata, result)
} else if (isFormatted) {
describeFormatted(metadata, result)
} else {
- describe(metadata, result)
+ describePartitionInfo(metadata, result)
}
}
result
}
- // Shows data columns and partitioned columns (if any)
- private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
+ private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
if (DDLUtils.isDatasourceTable(table)) {
- val schema = DDLUtils.getSchemaFromTableProperties(table)
-
- if (schema.isEmpty) {
- append(buffer, "# Schema of this table is inferred at runtime", "", "")
- } else {
- schema.foreach(describeSchema(_, buffer))
- }
-
val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
if (partCols.nonEmpty) {
append(buffer, "# Partition Information", "", "")
@@ -443,8 +443,6 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
partCols.foreach(col => append(buffer, col, "", ""))
}
} else {
- describeSchema(table.schema, buffer)
-
if (table.partitionColumns.nonEmpty) {
append(buffer, "# Partition Information", "", "")
append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
@@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
}
private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
- describe(table, buffer)
+ describePartitionInfo(table, buffer)
append(buffer, "", "", "")
append(buffer, "# Detailed Table Information", table.toString, "")
}
private def describeFormatted(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
- describe(table, buffer)
+ describePartitionInfo(table, buffer)
append(buffer, "", "", "")
append(buffer, "# Detailed Table Information", "", "")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 343d7bae98..9228242021 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -612,15 +612,17 @@ class HiveDDLSuite
}
test("desc table for data source table - no user-defined schema") {
- withTable("t1") {
- withTempPath { dir =>
- val path = dir.getCanonicalPath
- spark.range(1).write.parquet(path)
- sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
+ Seq("parquet", "json", "orc").foreach { fileFormat =>
+ withTable("t1") {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ spark.range(1).write.format(fileFormat).save(path)
+ sql(s"CREATE TABLE t1 USING $fileFormat OPTIONS (PATH '$path')")
- val desc = sql("DESC FORMATTED t1").collect().toSeq
+ val desc = sql("DESC FORMATTED t1").collect().toSeq
- assert(desc.contains(Row("# Schema of this table is inferred at runtime", "", "")))
+ assert(desc.contains(Row("id", "bigint", "")))
+ }
}
}
}