aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2016-05-04 16:44:09 +0800
committerCheng Lian <lian@databricks.com>2016-05-04 16:44:09 +0800
commitf152fae306dc75565cb4648ee1211416d7c0bb23 (patch)
treef5e7075d75968f037e8f670e1488bbb10a2c1c33 /sql/hive
parent6c12e801e84565265d2939b920901d1805d5935f (diff)
downloadspark-f152fae306dc75565cb4648ee1211416d7c0bb23.tar.gz
spark-f152fae306dc75565cb4648ee1211416d7c0bb23.tar.bz2
spark-f152fae306dc75565cb4648ee1211416d7c0bb23.zip
[SPARK-14127][SQL] Native "DESC [EXTENDED | FORMATTED] <table>" DDL command
## What changes were proposed in this pull request? This PR implements native `DESC [EXTENDED | FORMATTED] <table>` DDL command. Sample output: ``` scala> spark.sql("desc extended src").show(100, truncate = false) +----------------------------+---------------------------------+-------+ |col_name |data_type |comment| +----------------------------+---------------------------------+-------+ |key |int | | |value |string | | | | | | |# Detailed Table Information|CatalogTable(`default`.`src`, ...| | +----------------------------+---------------------------------+-------+ scala> spark.sql("desc formatted src").show(100, truncate = false) +----------------------------+----------------------------------------------------------+-------+ |col_name |data_type |comment| +----------------------------+----------------------------------------------------------+-------+ |key |int | | |value |string | | | | | | |# Detailed Table Information| | | |Database: |default | | |Owner: |lian | | |Create Time: |Mon Jan 04 17:06:00 CST 2016 | | |Last Access Time: |Thu Jan 01 08:00:00 CST 1970 | | |Location: |hdfs://localhost:9000/user/hive/warehouse_hive121/src | | |Table Type: |MANAGED | | |Table Parameters: | | | | transient_lastDdlTime |1451898360 | | | | | | |# Storage Information | | | |SerDe Library: |org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | | |InputFormat: |org.apache.hadoop.mapred.TextInputFormat | | |OutputFormat: |org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat| | |Num Buckets: |-1 | | |Bucket Columns: |[] | | |Sort Columns: |[] | | |Storage Desc Parameters: | | | | serialization.format |1 | | +----------------------------+----------------------------------------------------------+-------+ ``` ## How was this patch tested? A test case is added to `HiveDDLSuite` to check command output. Author: Cheng Lian <lian@databricks.com> Closes #12844 from liancheng/spark-14127-desc-table.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala3
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala1
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala1
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala17
6 files changed, 25 insertions, 3 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 47d9546c4f..cddc0b6e34 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -351,6 +351,7 @@ private[hive] class HiveClientImpl(
sortColumnNames = Seq(), // TODO: populate this
bucketColumnNames = h.getBucketCols.asScala,
numBuckets = h.getNumBuckets,
+ owner = h.getOwner,
createTime = h.getTTable.getCreateTime.toLong * 1000,
lastAccessTime = h.getLastAccessTime.toLong * 1000,
storage = CatalogStorageFormat(
@@ -358,6 +359,7 @@ private[hive] class HiveClientImpl(
inputFormat = Option(h.getInputFormatClass).map(_.getName),
outputFormat = Option(h.getOutputFormatClass).map(_.getName),
serde = Option(h.getSerializationLib),
+ compressed = h.getTTable.getSd.isCompressed,
serdeProperties = h.getTTable.getSd.getSerdeInfo.getParameters.asScala.toMap
),
properties = h.getParameters.asScala.toMap,
@@ -788,7 +790,7 @@ private[hive] class HiveClientImpl(
inputFormat = Option(apiPartition.getSd.getInputFormat),
outputFormat = Option(apiPartition.getSd.getOutputFormat),
serde = Option(apiPartition.getSd.getSerdeInfo.getSerializationLib),
+ compressed = apiPartition.getSd.isCompressed,
serdeProperties = apiPartition.getSd.getSerdeInfo.getParameters.asScala.toMap))
}
-
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 08d4b99d30..9dfbafae87 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -56,7 +56,8 @@ case class CreateTableAsSelect(
outputFormat =
tableDesc.storage.outputFormat
.orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)),
- serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)))
+ serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)),
+ compressed = tableDesc.storage.compressed)
val withSchema = if (withFormat.schema.isEmpty) {
// Hive doesn't support specifying the column list for target table in CTAS
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 8dc3c64353..c4ebc604dc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -579,7 +579,7 @@ class HiveDDLCommandSuite extends PlanTest {
assert(source2.table == "table2")
}
- test("load data") {
+ test("load data") {
val v1 = "LOAD DATA INPATH 'path' INTO TABLE table1"
val (table, path, isLocal, isOverwrite, partition) = parser.parsePlan(v1).collect {
case LoadData(t, path, l, o, partition) => (t, path, l, o, partition)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index c3a9f2479c..4bdcb96feb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -732,6 +732,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
inputFormat = None,
outputFormat = None,
serde = None,
+ compressed = false,
serdeProperties = Map(
"path" -> sessionState.catalog.hiveDefaultTableFilePath(TableIdentifier(tableName)))
),
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 9341b3816f..a6a5ab3988 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -157,6 +157,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
outputFormat = Some(
classOf[org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat[_, _]].getName),
serde = Some(classOf[org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe].getName()),
+ compressed = false,
serdeProperties = Map.empty
))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 687a4a7e51..373d1a1e0e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -348,4 +348,21 @@ class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
}
}
}
+
+ test("desc table") {
+ withTable("tab1") {
+ val tabName = "tab1"
+ sql(s"CREATE TABLE $tabName(c1 int)")
+
+ assert(sql(s"DESC $tabName").collect().length == 1)
+
+ assert(
+ sql(s"DESC FORMATTED $tabName").collect()
+ .exists(_.getString(0) == "# Storage Information"))
+
+ assert(
+ sql(s"DESC EXTENDED $tabName").collect()
+ .exists(_.getString(0) == "# Detailed Table Information"))
+ }
+ }
}