diff options
author | gatorsmile <gatorsmile@gmail.com> | 2017-02-08 10:11:44 -0500 |
---|---|---|
committer | gatorsmile <gatorsmile@gmail.com> | 2017-02-08 10:11:44 -0500 |
commit | 4d4d0de7f64cefbca28dc532b7864de9626aa241 (patch) | |
tree | cb18df849f06b90cdaae81c0a89ab0f55c4edb41 /sql/hive/src/test | |
parent | 0077bfcb93832d93009f73f4b80f2e3d98fd2fa4 (diff) | |
download | spark-4d4d0de7f64cefbca28dc532b7864de9626aa241.tar.gz spark-4d4d0de7f64cefbca28dc532b7864de9626aa241.tar.bz2 spark-4d4d0de7f64cefbca28dc532b7864de9626aa241.zip |
[SPARK-19279][SQL][FOLLOW-UP] Infer Schema for Hive Serde Tables
### What changes were proposed in this pull request?
`table.schema` is always not empty for partitioned tables, because `table.schema` also contains the partitioned columns, even if the original table does not have any column. This PR is to fix the issue.
### How was this patch tested?
Added a test case
Author: gatorsmile <gatorsmile@gmail.com>
Closes #16848 from gatorsmile/inferHiveSerdeSchema.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index c262095df6..cf1fe2bc70 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.SparkContext import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.execution.command.CreateTableCommand import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.hive.HiveExternalCatalog._ import org.apache.spark.sql.hive.client.HiveClient @@ -1308,6 +1309,49 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } } + test("Infer schema for Hive serde tables") { + val tableName = "tab1" + val avroSchema = + """{ + | "name": "test_record", + | "type": "record", + | "fields": [ { + | "name": "f0", + | "type": "int" + | }] + |} + """.stripMargin + + Seq(true, false).foreach { isPartitioned => + withTable(tableName) { + val partitionClause = if (isPartitioned) "PARTITIONED BY (ds STRING)" else "" + // Creates the (non-)partitioned Avro table + val plan = sql( + s""" + |CREATE TABLE $tableName + |$partitionClause + |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + |STORED AS + | INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + |TBLPROPERTIES ('avro.schema.literal' = '$avroSchema') + """.stripMargin + ).queryExecution.analyzed + + assert(plan.isInstanceOf[CreateTableCommand] && + plan.asInstanceOf[CreateTableCommand].table.dataSchema.nonEmpty) + + if (isPartitioned) { + sql(s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1") + checkAnswer(spark.table(tableName), Row(1, "a")) + } else { + sql(s"INSERT OVERWRITE TABLE $tableName SELECT 1") + checkAnswer(spark.table(tableName), Row(1)) + } + } + } + } + private def withDebugMode(f: => Unit): Unit = { val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE) try { |