aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-09-29 15:30:18 -0700
committerHerman van Hovell <hvanhovell@databricks.com>2016-09-29 15:30:18 -0700
commit4ecc648ad713f9d618adf0406b5d39981779059d (patch)
treed9dbfa340c81ebce37ae2d593aa2ce4302367bf0
parent566d7f28275f90f7b9bed6a75e90989ad0c59931 (diff)
downloadspark-4ecc648ad713f9d618adf0406b5d39981779059d.tar.gz
spark-4ecc648ad713f9d618adf0406b5d39981779059d.tar.bz2
spark-4ecc648ad713f9d618adf0406b5d39981779059d.zip
[SPARK-17612][SQL] Support `DESCRIBE table PARTITION` SQL syntax
## What changes were proposed in this pull request? This PR implements `DESCRIBE table PARTITION` SQL Syntax again. It was supported until Spark 1.6.2, but was dropped since 2.0.0. **Spark 1.6.2** ```scala scala> sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)") res2: org.apache.spark.sql.DataFrame = [result: string] scala> sql("DESC partitioned_table PARTITION (c='Us', d=1)").show(false) +----------------------------------------------------------------+ |result | +----------------------------------------------------------------+ |a string | |b int | |c string | |d string | | | |# Partition Information | |# col_name data_type comment | | | |c string | |d string | +----------------------------------------------------------------+ ``` **Spark 2.0** - **Before** ```scala scala> sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)") res0: org.apache.spark.sql.DataFrame = [] scala> sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)") res1: org.apache.spark.sql.DataFrame = [] scala> sql("DESC partitioned_table PARTITION (c='Us', d=1)").show(false) org.apache.spark.sql.catalyst.parser.ParseException: Unsupported SQL statement ``` - **After** ```scala scala> sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)") res0: org.apache.spark.sql.DataFrame = [] scala> sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)") res1: org.apache.spark.sql.DataFrame = [] scala> sql("DESC partitioned_table PARTITION (c='Us', d=1)").show(false) +-----------------------+---------+-------+ |col_name |data_type|comment| +-----------------------+---------+-------+ |a |string |null | |b |int |null | |c |string |null | |d |string |null | |# Partition Information| | | |# col_name |data_type|comment| |c |string |null | |d |string |null | +-----------------------+---------+-------+ scala> sql("DESC EXTENDED partitioned_table PARTITION (c='Us', d=1)").show(100,false) +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+-------+ |col_name |data_type|comment| +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+-------+ |a |string |null | |b |int |null | |c |string |null | |d |string |null | |# Partition Information | | | |# col_name |data_type|comment| |c |string |null | |d |string |null | | | | | |Detailed Partition Information CatalogPartition( Partition Values: [Us, 1] Storage(Location: file:/Users/dhyun/SPARK-17612-DESC-PARTITION/spark-warehouse/partitioned_table/c=Us/d=1, InputFormat: org.apache.hadoop.mapred.TextInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, Serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Properties: [serialization.format=1]) Partition Parameters:{transient_lastDdlTime=1475001066})| | | +-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------+-------+ scala> sql("DESC FORMATTED partitioned_table PARTITION (c='Us', d=1)").show(100,false) +--------------------------------+---------------------------------------------------------------------------------------+-------+ |col_name |data_type |comment| +--------------------------------+---------------------------------------------------------------------------------------+-------+ |a |string |null | |b |int |null | |c |string |null | |d |string |null | |# Partition Information | | | |# col_name |data_type |comment| |c |string |null | |d |string |null | | | | | |# Detailed Partition Information| | | |Partition Value: |[Us, 1] | | |Database: |default | | |Table: |partitioned_table | | |Location: |file:/Users/dhyun/SPARK-17612-DESC-PARTITION/spark-warehouse/partitioned_table/c=Us/d=1| | |Partition Parameters: | | | | transient_lastDdlTime |1475001066 | | | | | | |# Storage Information | | | |SerDe Library: |org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | | |InputFormat: |org.apache.hadoop.mapred.TextInputFormat | | |OutputFormat: |org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | | |Compressed: |No | | |Storage Desc Parameters: | | | | serialization.format |1 | | +--------------------------------+---------------------------------------------------------------------------------------+-------+ ``` ## How was this patch tested? Pass the Jenkins tests with a new testcase. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #15168 from dongjoon-hyun/SPARK-17612.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala13
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala15
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala83
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/describe.sql27
-rw-r--r--sql/core/src/test/resources/sql-tests/results/describe.sql.out90
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala77
6 files changed, 287 insertions, 18 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index e52251f960..51326ca25e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -86,7 +86,18 @@ object CatalogStorageFormat {
case class CatalogTablePartition(
spec: CatalogTypes.TablePartitionSpec,
storage: CatalogStorageFormat,
- parameters: Map[String, String] = Map.empty)
+ parameters: Map[String, String] = Map.empty) {
+
+ override def toString: String = {
+ val output =
+ Seq(
+ s"Partition Values: [${spec.values.mkString(", ")}]",
+ s"$storage",
+ s"Partition Parameters:{${parameters.map(p => p._1 + "=" + p._2).mkString(", ")}}")
+
+ output.filter(_.nonEmpty).mkString("CatalogPartition(\n\t", "\n\t", ")")
+ }
+}
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 5359cedc80..3f34d0f253 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -276,13 +276,24 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
* Create a [[DescribeTableCommand]] logical plan.
*/
override def visitDescribeTable(ctx: DescribeTableContext): LogicalPlan = withOrigin(ctx) {
- // Describe partition and column are not supported yet. Return null and let the parser decide
+ // Describe column are not supported yet. Return null and let the parser decide
// what to do with this (create an exception or pass it on to a different system).
- if (ctx.describeColName != null || ctx.partitionSpec != null) {
+ if (ctx.describeColName != null) {
null
} else {
+ val partitionSpec = if (ctx.partitionSpec != null) {
+ // According to the syntax, visitPartitionSpec returns `Map[String, Option[String]]`.
+ visitPartitionSpec(ctx.partitionSpec).map {
+ case (key, Some(value)) => key -> value
+ case (key, _) =>
+ throw new ParseException(s"PARTITION specification is incomplete: `$key`", ctx)
+ }
+ } else {
+ Map.empty[String, String]
+ }
DescribeTableCommand(
visitTableIdentifier(ctx.tableIdentifier),
+ partitionSpec,
ctx.EXTENDED != null,
ctx.FORMATTED != null)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 6a91c997ba..08de6cd424 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
@@ -390,10 +390,14 @@ case class TruncateTableCommand(
/**
* Command that looks like
* {{{
- * DESCRIBE [EXTENDED|FORMATTED] table_name;
+ * DESCRIBE [EXTENDED|FORMATTED] table_name partitionSpec?;
* }}}
*/
-case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isFormatted: Boolean)
+case class DescribeTableCommand(
+ table: TableIdentifier,
+ partitionSpec: TablePartitionSpec,
+ isExtended: Boolean,
+ isFormatted: Boolean)
extends RunnableCommand {
override val output: Seq[Attribute] = Seq(
@@ -411,17 +415,25 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
val catalog = sparkSession.sessionState.catalog
if (catalog.isTemporaryTable(table)) {
+ if (partitionSpec.nonEmpty) {
+ throw new AnalysisException(
+ s"DESC PARTITION is not allowed on a temporary view: ${table.identifier}")
+ }
describeSchema(catalog.lookupRelation(table).schema, result)
} else {
val metadata = catalog.getTableMetadata(table)
describeSchema(metadata.schema, result)
- if (isExtended) {
- describeExtended(metadata, result)
- } else if (isFormatted) {
- describeFormatted(metadata, result)
+ describePartitionInfo(metadata, result)
+
+ if (partitionSpec.isEmpty) {
+ if (isExtended) {
+ describeExtendedTableInfo(metadata, result)
+ } else if (isFormatted) {
+ describeFormattedTableInfo(metadata, result)
+ }
} else {
- describePartitionInfo(metadata, result)
+ describeDetailedPartitionInfo(catalog, metadata, result)
}
}
@@ -436,16 +448,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
}
}
- private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
- describePartitionInfo(table, buffer)
-
+ private def describeExtendedTableInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
append(buffer, "", "", "")
append(buffer, "# Detailed Table Information", table.toString, "")
}
- private def describeFormatted(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
- describePartitionInfo(table, buffer)
-
+ private def describeFormattedTableInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
append(buffer, "", "", "")
append(buffer, "# Detailed Table Information", "", "")
append(buffer, "Database:", table.database, "")
@@ -499,6 +507,53 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
}
}
+ private def describeDetailedPartitionInfo(
+ catalog: SessionCatalog,
+ metadata: CatalogTable,
+ result: ArrayBuffer[Row]): Unit = {
+ if (metadata.tableType == CatalogTableType.VIEW) {
+ throw new AnalysisException(
+ s"DESC PARTITION is not allowed on a view: ${table.identifier}")
+ }
+ if (DDLUtils.isDatasourceTable(metadata)) {
+ throw new AnalysisException(
+ s"DESC PARTITION is not allowed on a datasource table: ${table.identifier}")
+ }
+ val partition = catalog.getPartition(table, partitionSpec)
+ if (isExtended) {
+ describeExtendedDetailedPartitionInfo(table, metadata, partition, result)
+ } else if (isFormatted) {
+ describeFormattedDetailedPartitionInfo(table, metadata, partition, result)
+ describeStorageInfo(metadata, result)
+ }
+ }
+
+ private def describeExtendedDetailedPartitionInfo(
+ tableIdentifier: TableIdentifier,
+ table: CatalogTable,
+ partition: CatalogTablePartition,
+ buffer: ArrayBuffer[Row]): Unit = {
+ append(buffer, "", "", "")
+ append(buffer, "Detailed Partition Information " + partition.toString, "", "")
+ }
+
+ private def describeFormattedDetailedPartitionInfo(
+ tableIdentifier: TableIdentifier,
+ table: CatalogTable,
+ partition: CatalogTablePartition,
+ buffer: ArrayBuffer[Row]): Unit = {
+ append(buffer, "", "", "")
+ append(buffer, "# Detailed Partition Information", "", "")
+ append(buffer, "Partition Value:", s"[${partition.spec.values.mkString(", ")}]", "")
+ append(buffer, "Database:", table.database, "")
+ append(buffer, "Table:", tableIdentifier.table, "")
+ append(buffer, "Location:", partition.storage.locationUri.getOrElse(""), "")
+ append(buffer, "Partition Parameters:", "", "")
+ partition.parameters.foreach { case (key, value) =>
+ append(buffer, s" $key", value, "")
+ }
+ }
+
private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = {
schema.foreach { column =>
append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
new file mode 100644
index 0000000000..3f0ae902e0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -0,0 +1,27 @@
+CREATE TABLE t (a STRING, b INT) PARTITIONED BY (c STRING, d STRING);
+
+ALTER TABLE t ADD PARTITION (c='Us', d=1);
+
+DESC t;
+
+-- Ignore these because there exist timestamp results, e.g., `Create Table`.
+-- DESC EXTENDED t;
+-- DESC FORMATTED t;
+
+DESC t PARTITION (c='Us', d=1);
+
+-- Ignore these because there exist timestamp results, e.g., transient_lastDdlTime.
+-- DESC EXTENDED t PARTITION (c='Us', d=1);
+-- DESC FORMATTED t PARTITION (c='Us', d=1);
+
+-- NoSuchPartitionException: Partition not found in table
+DESC t PARTITION (c='Us', d=2);
+
+-- AnalysisException: Partition spec is invalid
+DESC t PARTITION (c='Us');
+
+-- ParseException: PARTITION specification is incomplete
+DESC t PARTITION (c='Us', d);
+
+-- DROP TEST TABLE
+DROP TABLE t;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
new file mode 100644
index 0000000000..37bf303f1b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -0,0 +1,90 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query 0
+CREATE TABLE t (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+ALTER TABLE t ADD PARTITION (c='Us', d=1)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+DESC t
+-- !query 2 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 2 output
+# Partition Information
+# col_name data_type comment
+a string
+b int
+c string
+c string
+d string
+d string
+
+
+-- !query 3
+DESC t PARTITION (c='Us', d=1)
+-- !query 3 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 3 output
+# Partition Information
+# col_name data_type comment
+a string
+b int
+c string
+c string
+d string
+d string
+
+
+-- !query 4
+DESC t PARTITION (c='Us', d=2)
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
+Partition not found in table 't' database 'default':
+c -> Us
+d -> 2;
+
+
+-- !query 5
+DESC t PARTITION (c='Us')
+-- !query 5 schema
+struct<>
+-- !query 5 output
+org.apache.spark.sql.AnalysisException
+Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`';
+
+
+-- !query 6
+DESC t PARTITION (c='Us', d)
+-- !query 6 schema
+struct<>
+-- !query 6 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+PARTITION specification is incomplete: `d`(line 1, pos 0)
+
+== SQL ==
+DESC t PARTITION (c='Us', d)
+^^^
+
+
+-- !query 7
+DROP TABLE t
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index dc4d099f0f..6c77a0deb5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry, NoSuchPartitionException}
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
@@ -341,6 +341,81 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}
+ test("describe partition") {
+ withTable("partitioned_table") {
+ sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)")
+ sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)")
+
+ checkKeywordsExist(sql("DESC partitioned_table PARTITION (c='Us', d=1)"),
+ "# Partition Information",
+ "# col_name")
+
+ checkKeywordsExist(sql("DESC EXTENDED partitioned_table PARTITION (c='Us', d=1)"),
+ "# Partition Information",
+ "# col_name",
+ "Detailed Partition Information CatalogPartition(",
+ "Partition Values: [Us, 1]",
+ "Storage(Location:",
+ "Partition Parameters")
+
+ checkKeywordsExist(sql("DESC FORMATTED partitioned_table PARTITION (c='Us', d=1)"),
+ "# Partition Information",
+ "# col_name",
+ "# Detailed Partition Information",
+ "Partition Value:",
+ "Database:",
+ "Table:",
+ "Location:",
+ "Partition Parameters:",
+ "# Storage Information")
+ }
+ }
+
+ test("describe partition - error handling") {
+ withTable("partitioned_table", "datasource_table") {
+ sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)")
+ sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)")
+
+ val m = intercept[NoSuchPartitionException] {
+ sql("DESC partitioned_table PARTITION (c='Us', d=2)")
+ }.getMessage()
+ assert(m.contains("Partition not found in table"))
+
+ val m2 = intercept[AnalysisException] {
+ sql("DESC partitioned_table PARTITION (c='Us')")
+ }.getMessage()
+ assert(m2.contains("Partition spec is invalid"))
+
+ val m3 = intercept[ParseException] {
+ sql("DESC partitioned_table PARTITION (c='Us', d)")
+ }.getMessage()
+ assert(m3.contains("PARTITION specification is incomplete: `d`"))
+
+ spark
+ .range(1).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write
+ .partitionBy("d")
+ .saveAsTable("datasource_table")
+ val m4 = intercept[AnalysisException] {
+ sql("DESC datasource_table PARTITION (d=2)")
+ }.getMessage()
+ assert(m4.contains("DESC PARTITION is not allowed on a datasource table"))
+
+ val m5 = intercept[AnalysisException] {
+ spark.range(10).select('id as 'a, 'id as 'b).createTempView("view1")
+ sql("DESC view1 PARTITION (c='Us', d=1)")
+ }.getMessage()
+ assert(m5.contains("DESC PARTITION is not allowed on a temporary view"))
+
+ withView("permanent_view") {
+ val m = intercept[AnalysisException] {
+ sql("CREATE VIEW permanent_view AS SELECT * FROM partitioned_table")
+ sql("DESC permanent_view PARTITION (c='Us', d=1)")
+ }.getMessage()
+ assert(m.contains("DESC PARTITION is not allowed on a view"))
+ }
+ }
+ }
+
test("SPARK-5371: union with null and sum") {
val df = Seq((1, 1)).toDF("c1", "c2")
df.createOrReplaceTempView("table1")