diff options
author | Cheng Lian <lian@databricks.com> | 2016-05-04 16:44:09 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-05-04 16:44:09 +0800 |
commit | f152fae306dc75565cb4648ee1211416d7c0bb23 (patch) | |
tree | f5e7075d75968f037e8f670e1488bbb10a2c1c33 /sql/hive | |
parent | 6c12e801e84565265d2939b920901d1805d5935f (diff) | |
download | spark-f152fae306dc75565cb4648ee1211416d7c0bb23.tar.gz spark-f152fae306dc75565cb4648ee1211416d7c0bb23.tar.bz2 spark-f152fae306dc75565cb4648ee1211416d7c0bb23.zip |
[SPARK-14127][SQL] Native "DESC [EXTENDED | FORMATTED] <table>" DDL command
## What changes were proposed in this pull request?
This PR implements native `DESC [EXTENDED | FORMATTED] <table>` DDL command. Sample output:
```
scala> spark.sql("desc extended src").show(100, truncate = false)
+----------------------------+---------------------------------+-------+
|col_name |data_type |comment|
+----------------------------+---------------------------------+-------+
|key |int | |
|value |string | |
| | | |
|# Detailed Table Information|CatalogTable(`default`.`src`, ...| |
+----------------------------+---------------------------------+-------+
scala> spark.sql("desc formatted src").show(100, truncate = false)
+----------------------------+----------------------------------------------------------+-------+
|col_name |data_type |comment|
+----------------------------+----------------------------------------------------------+-------+
|key |int | |
|value |string | |
| | | |
|# Detailed Table Information| | |
|Database: |default | |
|Owner: |lian | |
|Create Time: |Mon Jan 04 17:06:00 CST 2016 | |
|Last Access Time: |Thu Jan 01 08:00:00 CST 1970 | |
|Location: |hdfs://localhost:9000/user/hive/warehouse_hive121/src | |
|Table Type: |MANAGED | |
|Table Parameters: | | |
| transient_lastDdlTime |1451898360 | |
| | | |
|# Storage Information | | |
|SerDe Library: |org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | |
|InputFormat: |org.apache.hadoop.mapred.TextInputFormat | |
|OutputFormat: |org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat| |
|Num Buckets: |-1 | |
|Bucket Columns: |[] | |
|Sort Columns: |[] | |
|Storage Desc Parameters: | | |
| serialization.format |1 | |
+----------------------------+----------------------------------------------------------+-------+
```
## How was this patch tested?
A test case is added to `HiveDDLSuite` to check command output.
Author: Cheng Lian <lian@databricks.com>
Closes #12844 from liancheng/spark-14127-desc-table.
Diffstat (limited to 'sql/hive')
6 files changed, 25 insertions, 3 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 47d9546c4f..cddc0b6e34 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -351,6 +351,7 @@ private[hive] class HiveClientImpl( sortColumnNames = Seq(), // TODO: populate this bucketColumnNames = h.getBucketCols.asScala, numBuckets = h.getNumBuckets, + owner = h.getOwner, createTime = h.getTTable.getCreateTime.toLong * 1000, lastAccessTime = h.getLastAccessTime.toLong * 1000, storage = CatalogStorageFormat( @@ -358,6 +359,7 @@ private[hive] class HiveClientImpl( inputFormat = Option(h.getInputFormatClass).map(_.getName), outputFormat = Option(h.getOutputFormatClass).map(_.getName), serde = Option(h.getSerializationLib), + compressed = h.getTTable.getSd.isCompressed, serdeProperties = h.getTTable.getSd.getSerdeInfo.getParameters.asScala.toMap ), properties = h.getParameters.asScala.toMap, @@ -788,7 +790,7 @@ private[hive] class HiveClientImpl( inputFormat = Option(apiPartition.getSd.getInputFormat), outputFormat = Option(apiPartition.getSd.getOutputFormat), serde = Option(apiPartition.getSd.getSerdeInfo.getSerializationLib), + compressed = apiPartition.getSd.isCompressed, serdeProperties = apiPartition.getSd.getSerdeInfo.getParameters.asScala.toMap)) } - } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala index 08d4b99d30..9dfbafae87 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala @@ -56,7 +56,8 @@ case class CreateTableAsSelect( outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), - serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName))) + serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), + compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index 8dc3c64353..c4ebc604dc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -579,7 +579,7 @@ class HiveDDLCommandSuite extends PlanTest { assert(source2.table == "table2") } - test("load data") { + test("load data") { val v1 = "LOAD DATA INPATH 'path' INTO TABLE table1" val (table, path, isLocal, isOverwrite, partition) = parser.parsePlan(v1).collect { case LoadData(t, path, l, o, partition) => (t, path, l, o, partition) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index c3a9f2479c..4bdcb96feb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -732,6 +732,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv inputFormat = None, outputFormat = None, serde = None, + compressed = false, serdeProperties = Map( "path" -> sessionState.catalog.hiveDefaultTableFilePath(TableIdentifier(tableName))) ), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 9341b3816f..a6a5ab3988 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -157,6 +157,7 @@ class VersionsSuite extends SparkFunSuite with Logging { outputFormat = Some( classOf[org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat[_, _]].getName), serde = Some(classOf[org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe].getName()), + compressed = false, serdeProperties = Map.empty )) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 687a4a7e51..373d1a1e0e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -348,4 +348,21 @@ class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } } + + test("desc table") { + withTable("tab1") { + val tabName = "tab1" + sql(s"CREATE TABLE $tabName(c1 int)") + + assert(sql(s"DESC $tabName").collect().length == 1) + + assert( + sql(s"DESC FORMATTED $tabName").collect() + .exists(_.getString(0) == "# Storage Information")) + + assert( + sql(s"DESC EXTENDED $tabName").collect() + .exists(_.getString(0) == "# Detailed Table Information")) + } + } } |