[SPARK-18220][SQL] read Hive orc table with varchar column should not fail

## What changes were proposed in this pull request? Spark SQL only has `StringType`, when reading hive table with varchar column, we will read that column as `StringType`. However, we still need to use varchar `ObjectInspector` to read varchar column in hive table, which means we need to know the actual column type at hive side. In Spark 2.1, after https://github.com/apache/spark/pull/14363 , we parse hive type string to catalyst type, which means the actual column type at hive side is erased. Then we may use string `ObjectInspector` to read varchar column and fail. This PR keeps the original hive column type string in the metadata of `StructField`, and use it when we convert it to a hive column. ## How was this patch tested? newly added regression test Author: Wenchen Fan <wenchen@databricks.com> Closes #16060 from cloud-fan/varchar.
author: Wenchen Fan <wenchen@databricks.com> 2016-11-30 09:47:30 -0800
committer: Reynold Xin <rxin@databricks.com> 2016-11-30 09:47:30 -0800
commit: 3f03c90a807872d47588f3c3920769b8978033bf (patch)
tree: 863261a3c70532d2f9fdf770b11ede4ab9825810 /sql/hive/src/test
parent: c24076dcf867f8d7bb328055ca817bc09ad0c1d1 (diff)
download: spark-3f03c90a807872d47588f3c3920769b8978033bf.tar.gz
spark-3f03c90a807872d47588f3c3920769b8978033bf.tar.bz2
spark-3f03c90a807872d47588f3c3920769b8978033bf.zip
2 files changed, 14 insertions, 2 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
index cca4480c44..c5753cec80 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
@@ -205,7 +205,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
   test("make sure we can read table created by old version of Spark") {
     for ((tbl, expectedSchema) <- rawTablesAndExpectations) {
       val readBack = getTableMetadata(tbl.identifier.table)
-      assert(readBack.schema == expectedSchema)
+      assert(readBack.schema.sameType(expectedSchema))
 
       if (tbl.tableType == CatalogTableType.EXTERNAL) {
         // trim the URI prefix
@@ -235,7 +235,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
       sql(s"ALTER TABLE ${tbl.identifier} RENAME TO $newName")
 
       val readBack = getTableMetadata(newName)
-      assert(readBack.schema == expectedSchema)
+      assert(readBack.schema.sameType(expectedSchema))
 
       // trim the URI prefix
       val actualTableLocation = new URI(readBack.storage.locationUri.get).getPath
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
index 12f948041a..2b40469051 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -150,6 +151,17 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
   test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
     assert(new OrcOptions(Map("Orc.Compress" -> "NONE")).compressionCodec == "NONE")
   }
+
+  test("SPARK-18220: read Hive orc table with varchar column") {
+    val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+    try {
+      hiveClient.runSqlHive("CREATE TABLE orc_varchar(a VARCHAR(10)) STORED AS orc")
+      hiveClient.runSqlHive("INSERT INTO TABLE orc_varchar SELECT 'a' FROM (SELECT 1) t")
+      checkAnswer(spark.table("orc_varchar"), Row("a"))
+    } finally {
+      hiveClient.runSqlHive("DROP TABLE IF EXISTS orc_varchar")
+    }
+  }
 }
 
 class OrcSourceSuite extends OrcSuite {
author	Wenchen Fan <wenchen@databricks.com>	2016-11-30 09:47:30 -0800
committer	Reynold Xin <rxin@databricks.com>	2016-11-30 09:47:30 -0800
commit	3f03c90a807872d47588f3c3920769b8978033bf (patch)
tree	863261a3c70532d2f9fdf770b11ede4ab9825810 /sql/hive/src/test
parent	c24076dcf867f8d7bb328055ca817bc09ad0c1d1 (diff)
download	spark-3f03c90a807872d47588f3c3920769b8978033bf.tar.gz spark-3f03c90a807872d47588f3c3920769b8978033bf.tar.bz2 spark-3f03c90a807872d47588f3c3920769b8978033bf.zip