aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorXiao Li <gatorsmile@gmail.com>2017-04-03 23:30:12 -0700
committerXiao Li <gatorsmile@gmail.com>2017-04-03 23:30:12 -0700
commit51d3c854c54369aec1bfd55cefcd080dcd178d5f (patch)
tree8c83dc8698f16b4af724fe4f8a1a7901224bdffb /sql/hive
parent3bfb639cb7352aec572ef6686d3471bd78748ffa (diff)
downloadspark-51d3c854c54369aec1bfd55cefcd080dcd178d5f.tar.gz
spark-51d3c854c54369aec1bfd55cefcd080dcd178d5f.tar.bz2
spark-51d3c854c54369aec1bfd55cefcd080dcd178d5f.zip
[SPARK-20067][SQL] Unify and Clean Up Desc Commands Using Catalog Interface
### What changes were proposed in this pull request? This PR is to unify and clean up the outputs of `DESC EXTENDED/FORMATTED` and `SHOW TABLE EXTENDED` by moving the logics into the Catalog interface. The output formats are improved. We also add the missing attributes. It impacts the DDL commands like `SHOW TABLE EXTENDED`, `DESC EXTENDED` and `DESC FORMATTED`. In addition, by following what we did in Dataset API `printSchema`, we can use `treeString` to show the schema in the more readable way. Below is the current way: ``` Schema: STRUCT<`a`: STRING (nullable = true), `b`: INT (nullable = true), `c`: STRING (nullable = true), `d`: STRING (nullable = true)> ``` After the change, it should look like ``` Schema: root |-- a: string (nullable = true) |-- b: integer (nullable = true) |-- c: string (nullable = true) |-- d: string (nullable = true) ``` ### How was this patch tested? `describe.sql` and `show-tables.sql` Author: Xiao Li <gatorsmile@gmail.com> Closes #17394 from gatorsmile/descFollowUp.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala8
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala93
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala53
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala56
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala131
6 files changed, 32 insertions, 313 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 55e02acfa4..b554694815 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -767,9 +767,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
sessionState.refreshTable(tableName)
val actualSchema = table(tableName).schema
assert(schema === actualSchema)
-
- // Checks the DESCRIBE output.
- checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", null) :: Nil)
}
}
@@ -1381,7 +1378,10 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
checkAnswer(spark.table("old"), Row(1, "a"))
- checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil)
+ val expectedSchema = StructType(Seq(
+ StructField("i", IntegerType, nullable = true),
+ StructField("j", StringType, nullable = true)))
+ assert(table("old").schema === expectedSchema)
}
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 536ca8fd9d..e45cf977bf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -207,6 +207,7 @@ abstract class HiveComparisonTest
// This list contains indicators for those lines which do not have actual results and we
// want to ignore.
lazy val ignoredLineIndicators = Seq(
+ "# Detailed Table Information",
"# Partition Information",
"# col_name"
)
@@ -358,7 +359,7 @@ abstract class HiveComparisonTest
stringToFile(new File(failedDirectory, testCaseName), errorMessage + consoleTestCase)
fail(errorMessage)
}
- }.toSeq
+ }
(queryList, hiveResults, catalystResults).zipped.foreach {
case (query, hive, (hiveQuery, catalyst)) =>
@@ -369,6 +370,7 @@ abstract class HiveComparisonTest
if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) &&
(!hiveQuery.logical.isInstanceOf[ShowFunctionsCommand]) &&
(!hiveQuery.logical.isInstanceOf[DescribeFunctionCommand]) &&
+ (!hiveQuery.logical.isInstanceOf[DescribeTableCommand]) &&
preparedHive != catalyst) {
val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f0a995c274..3906968aaf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -708,23 +708,6 @@ class HiveDDLSuite
}
}
- test("desc table for Hive table") {
- withTable("tab1") {
- val tabName = "tab1"
- sql(s"CREATE TABLE $tabName(c1 int)")
-
- assert(sql(s"DESC $tabName").collect().length == 1)
-
- assert(
- sql(s"DESC FORMATTED $tabName").collect()
- .exists(_.getString(0) == "# Storage Information"))
-
- assert(
- sql(s"DESC EXTENDED $tabName").collect()
- .exists(_.getString(0) == "# Detailed Table Information"))
- }
- }
-
test("desc table for Hive table - partitioned table") {
withTable("tbl") {
sql("CREATE TABLE tbl(a int) PARTITIONED BY (b int)")
@@ -741,23 +724,6 @@ class HiveDDLSuite
}
}
- test("desc formatted table for permanent view") {
- withTable("tbl") {
- withView("view1") {
- sql("CREATE TABLE tbl(a int)")
- sql("CREATE VIEW view1 AS SELECT * FROM tbl")
- assert(sql("DESC FORMATTED view1").collect().containsSlice(
- Seq(
- Row("# View Information", "", ""),
- Row("View Text:", "SELECT * FROM tbl", ""),
- Row("View Default Database:", "default", ""),
- Row("View Query Output Columns:", "[a]", "")
- )
- ))
- }
- }
- }
-
test("desc table for data source table using Hive Metastore") {
assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")
val tabName = "tab1"
@@ -766,7 +732,7 @@ class HiveDDLSuite
checkAnswer(
sql(s"DESC $tabName").select("col_name", "data_type", "comment"),
- Row("a", "int", "test")
+ Row("# col_name", "data_type", "comment") :: Row("a", "int", "test") :: Nil
)
}
}
@@ -1218,23 +1184,6 @@ class HiveDDLSuite
sql(s"SELECT * FROM ${targetTable.identifier}"))
}
- test("desc table for data source table") {
- withTable("tab1") {
- val tabName = "tab1"
- spark.range(1).write.format("json").saveAsTable(tabName)
-
- assert(sql(s"DESC $tabName").collect().length == 1)
-
- assert(
- sql(s"DESC FORMATTED $tabName").collect()
- .exists(_.getString(0) == "# Storage Information"))
-
- assert(
- sql(s"DESC EXTENDED $tabName").collect()
- .exists(_.getString(0) == "# Detailed Table Information"))
- }
- }
-
test("create table with the same name as an index table") {
val tabName = "tab1"
val indexName = tabName + "_index"
@@ -1320,46 +1269,6 @@ class HiveDDLSuite
}
}
- test("desc table for data source table - partitioned bucketed table") {
- withTable("t1") {
- spark
- .range(1).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write
- .bucketBy(2, "b").sortBy("c").partitionBy("d")
- .saveAsTable("t1")
-
- val formattedDesc = sql("DESC FORMATTED t1").collect()
-
- assert(formattedDesc.containsSlice(
- Seq(
- Row("a", "bigint", null),
- Row("b", "bigint", null),
- Row("c", "bigint", null),
- Row("d", "bigint", null),
- Row("# Partition Information", "", ""),
- Row("# col_name", "data_type", "comment"),
- Row("d", "bigint", null),
- Row("", "", ""),
- Row("# Detailed Table Information", "", ""),
- Row("Database:", "default", "")
- )
- ))
-
- assert(formattedDesc.containsSlice(
- Seq(
- Row("Table Type:", "MANAGED", "")
- )
- ))
-
- assert(formattedDesc.containsSlice(
- Seq(
- Row("Num Buckets:", "2", ""),
- Row("Bucket Columns:", "[b]", ""),
- Row("Sort Columns:", "[c]", "")
- )
- ))
- }
- }
-
test("datasource and statistics table property keys are not allowed") {
import org.apache.spark.sql.hive.HiveExternalCatalog.DATASOURCE_PREFIX
import org.apache.spark.sql.hive.HiveExternalCatalog.STATISTICS_PREFIX
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala
deleted file mode 100644
index 0e89e990e5..0000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveOperatorQueryableSuite.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.execution
-
-import org.apache.spark.sql.{QueryTest, Row}
-import org.apache.spark.sql.hive.test.TestHiveSingleton
-
-/**
- * A set of tests that validates commands can also be queried by like a table
- */
-class HiveOperatorQueryableSuite extends QueryTest with TestHiveSingleton {
- import spark._
-
- test("SPARK-5324 query result of describe command") {
- hiveContext.loadTestTable("src")
-
- // Creates a temporary view with the output of a describe command
- sql("desc src").createOrReplaceTempView("mydesc")
- checkAnswer(
- sql("desc mydesc"),
- Seq(
- Row("col_name", "string", "name of the column"),
- Row("data_type", "string", "data type of the column"),
- Row("comment", "string", "comment of the column")))
-
- checkAnswer(
- sql("select * from mydesc"),
- Seq(
- Row("key", "int", null),
- Row("value", "string", null)))
-
- checkAnswer(
- sql("select col_name, data_type, comment from mydesc"),
- Seq(
- Row("key", "int", null),
- Row("value", "string", null)))
- }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index dd278f683a..65a902fc54 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -789,62 +789,6 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
assert(Try(q0.count()).isSuccess)
}
- test("DESCRIBE commands") {
- sql(s"CREATE TABLE test_describe_commands1 (key INT, value STRING) PARTITIONED BY (dt STRING)")
-
- sql(
- """FROM src INSERT OVERWRITE TABLE test_describe_commands1 PARTITION (dt='2008-06-08')
- |SELECT key, value
- """.stripMargin)
-
- // Describe a table
- assertResult(
- Array(
- Row("key", "int", null),
- Row("value", "string", null),
- Row("dt", "string", null),
- Row("# Partition Information", "", ""),
- Row("# col_name", "data_type", "comment"),
- Row("dt", "string", null))
- ) {
- sql("DESCRIBE test_describe_commands1")
- .select('col_name, 'data_type, 'comment)
- .collect()
- }
-
- // Describe a table with a fully qualified table name
- assertResult(
- Array(
- Row("key", "int", null),
- Row("value", "string", null),
- Row("dt", "string", null),
- Row("# Partition Information", "", ""),
- Row("# col_name", "data_type", "comment"),
- Row("dt", "string", null))
- ) {
- sql("DESCRIBE default.test_describe_commands1")
- .select('col_name, 'data_type, 'comment)
- .collect()
- }
-
- // Describe a temporary view.
- val testData =
- TestHive.sparkContext.parallelize(
- TestData(1, "str1") ::
- TestData(1, "str2") :: Nil)
- testData.toDF().createOrReplaceTempView("test_describe_commands2")
-
- assertResult(
- Array(
- Row("a", "int", null),
- Row("b", "string", null))
- ) {
- sql("DESCRIBE test_describe_commands2")
- .select('col_name, 'data_type, 'comment)
- .collect()
- }
- }
-
test("SPARK-2263: Insert Map<K, V> values") {
sql("CREATE TABLE m(value MAP<INT, STRING>)")
sql("INSERT OVERWRITE TABLE m SELECT MAP(key, value) FROM src LIMIT 10")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 55ff4bb115..d012797e19 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -363,79 +363,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}
- test("describe partition") {
- withTable("partitioned_table") {
- sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)")
- sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)")
-
- checkKeywordsExist(sql("DESC partitioned_table PARTITION (c='Us', d=1)"),
- "# Partition Information",
- "# col_name")
-
- checkKeywordsExist(sql("DESC EXTENDED partitioned_table PARTITION (c='Us', d=1)"),
- "# Partition Information",
- "# col_name",
- "Detailed Partition Information CatalogPartition(",
- "Partition Values: [c=Us, d=1]",
- "Storage(Location:",
- "Partition Parameters")
-
- checkKeywordsExist(sql("DESC FORMATTED partitioned_table PARTITION (c='Us', d=1)"),
- "# Partition Information",
- "# col_name",
- "# Detailed Partition Information",
- "Partition Value:",
- "Database:",
- "Table:",
- "Location:",
- "Partition Parameters:",
- "# Storage Information")
- }
- }
-
- test("describe partition - error handling") {
- withTable("partitioned_table", "datasource_table") {
- sql("CREATE TABLE partitioned_table (a STRING, b INT) PARTITIONED BY (c STRING, d STRING)")
- sql("ALTER TABLE partitioned_table ADD PARTITION (c='Us', d=1)")
-
- val m = intercept[NoSuchPartitionException] {
- sql("DESC partitioned_table PARTITION (c='Us', d=2)")
- }.getMessage()
- assert(m.contains("Partition not found in table"))
-
- val m2 = intercept[AnalysisException] {
- sql("DESC partitioned_table PARTITION (c='Us')")
- }.getMessage()
- assert(m2.contains("Partition spec is invalid"))
-
- val m3 = intercept[ParseException] {
- sql("DESC partitioned_table PARTITION (c='Us', d)")
- }.getMessage()
- assert(m3.contains("PARTITION specification is incomplete: `d`"))
-
- spark
- .range(1).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd).write
- .partitionBy("d")
- .saveAsTable("datasource_table")
-
- sql("DESC datasource_table PARTITION (d=0)")
-
- val m5 = intercept[AnalysisException] {
- spark.range(10).select('id as 'a, 'id as 'b).createTempView("view1")
- sql("DESC view1 PARTITION (c='Us', d=1)")
- }.getMessage()
- assert(m5.contains("DESC PARTITION is not allowed on a temporary view"))
-
- withView("permanent_view") {
- val m = intercept[AnalysisException] {
- sql("CREATE VIEW permanent_view AS SELECT * FROM partitioned_table")
- sql("DESC permanent_view PARTITION (c='Us', d=1)")
- }.getMessage()
- assert(m.contains("DESC PARTITION is not allowed on a view"))
- }
- }
- }
-
test("SPARK-5371: union with null and sum") {
val df = Seq((1, 1)).toDF("c1", "c2")
df.createOrReplaceTempView("table1")
@@ -676,7 +603,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
test("CTAS with serde") {
- sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value").collect()
+ sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
sql(
"""CREATE TABLE ctas2
| ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
@@ -686,86 +613,76 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
| AS
| SELECT key, value
| FROM src
- | ORDER BY key, value""".stripMargin).collect()
+ | ORDER BY key, value""".stripMargin)
+
+ val storageCtas2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("ctas2")).storage
+ assert(storageCtas2.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+ assert(storageCtas2.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+ assert(storageCtas2.serde == Some("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"))
+
sql(
"""CREATE TABLE ctas3
| ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\012'
| STORED AS textfile AS
| SELECT key, value
| FROM src
- | ORDER BY key, value""".stripMargin).collect()
+ | ORDER BY key, value""".stripMargin)
// the table schema may like (key: integer, value: string)
sql(
"""CREATE TABLE IF NOT EXISTS ctas4 AS
- | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin).collect()
+ | SELECT 1 AS key, value FROM src LIMIT 1""".stripMargin)
// do nothing cause the table ctas4 already existed.
sql(
"""CREATE TABLE IF NOT EXISTS ctas4 AS
- | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect()
+ | SELECT key, value FROM src ORDER BY key, value""".stripMargin)
checkAnswer(
sql("SELECT k, value FROM ctas1 ORDER BY k, value"),
- sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
+ sql("SELECT key, value FROM src ORDER BY key, value"))
checkAnswer(
sql("SELECT key, value FROM ctas2 ORDER BY key, value"),
sql(
"""
SELECT key, value
FROM src
- ORDER BY key, value""").collect().toSeq)
+ ORDER BY key, value"""))
checkAnswer(
sql("SELECT key, value FROM ctas3 ORDER BY key, value"),
sql(
"""
SELECT key, value
FROM src
- ORDER BY key, value""").collect().toSeq)
+ ORDER BY key, value"""))
intercept[AnalysisException] {
sql(
"""CREATE TABLE ctas4 AS
- | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect()
+ | SELECT key, value FROM src ORDER BY key, value""".stripMargin)
}
checkAnswer(
sql("SELECT key, value FROM ctas4 ORDER BY key, value"),
sql("SELECT key, value FROM ctas4 LIMIT 1").collect().toSeq)
- /*
- Disabled because our describe table does not output the serde information right now.
- checkKeywordsExist(sql("DESC EXTENDED ctas2"),
- "name:key", "type:string", "name:value", "ctas2",
- "org.apache.hadoop.hive.ql.io.RCFileInputFormat",
- "org.apache.hadoop.hive.ql.io.RCFileOutputFormat",
- "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe",
- "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22", "MANAGED_TABLE"
- )
- */
-
sql(
"""CREATE TABLE ctas5
| STORED AS parquet AS
| SELECT key, value
| FROM src
- | ORDER BY key, value""".stripMargin).collect()
+ | ORDER BY key, value""".stripMargin)
+ val storageCtas5 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("ctas5")).storage
+ assert(storageCtas5.inputFormat ==
+ Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
+ assert(storageCtas5.outputFormat ==
+ Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"))
+ assert(storageCtas5.serde ==
+ Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"))
- /*
- Disabled because our describe table does not output the serde information right now.
- withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
- checkKeywordsExist(sql("DESC EXTENDED ctas5"),
- "name:key", "type:string", "name:value", "ctas5",
- "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat",
- "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat",
- "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe",
- "MANAGED_TABLE"
- )
- }
- */
// use the Hive SerDe for parquet tables
withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
checkAnswer(
sql("SELECT key, value FROM ctas5 ORDER BY key, value"),
- sql("SELECT key, value FROM src ORDER BY key, value").collect().toSeq)
+ sql("SELECT key, value FROM src ORDER BY key, value"))
}
}