[SQL] Add test case with workaround for reading partitioned Avro files

In order to read from partitioned Avro files we need to also set the `SERDEPROPERTIES` since `TBLPROPERTIES` are not passed to the initialization. This PR simply adds a test to make sure we don't break this workaround. Author: Michael Armbrust <michael@databricks.com> Closes #2340 from marmbrus/avroPartitioned and squashes the following commits: 6b969d6 [Michael Armbrust] fix style fea2124 [Michael Armbrust] Add test case with workaround for reading partitioned avro files.
author: Michael Armbrust <michael@databricks.com> 2014-09-10 20:57:38 -0700
committer: Michael Armbrust <michael@databricks.com> 2014-09-10 20:57:38 -0700
commit: 84e2c8bfe41837baf2aeffa9741e4dbd14351981 (patch)
tree: ee307abd6291da4da321a7fa8f1212c18d810a8f
parent: 79cdb9b64ad2fa3ab7f2c221766d36658b917c40 (diff)
download: spark-84e2c8bfe41837baf2aeffa9741e4dbd14351981.tar.gz
spark-84e2c8bfe41837baf2aeffa9741e4dbd14351981.tar.bz2
spark-84e2c8bfe41837baf2aeffa9741e4dbd14351981.zip
3 files changed, 78 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index a013f3f7a8..6974f3e581 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -269,7 +269,74 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
          |)
        """.stripMargin.cmd,
       s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}' INTO TABLE episodes".cmd
-    )
+    ),
+    // THIS TABLE IS NOT THE SAME AS THE HIVE TEST TABLE episodes_partitioned AS DYNAMIC PARITIONING
+    // IS NOT YET SUPPORTED
+    TestTable("episodes_part",
+      s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT)
+         |PARTITIONED BY (doctor_pt INT)
+         |ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
+         |STORED AS
+         |INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
+         |OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
+         |TBLPROPERTIES (
+         |  'avro.schema.literal'='{
+         |    "type": "record",
+         |    "name": "episodes",
+         |    "namespace": "testing.hive.avro.serde",
+         |    "fields": [
+         |      {
+         |          "name": "title",
+         |          "type": "string",
+         |          "doc": "episode title"
+         |      },
+         |      {
+         |          "name": "air_date",
+         |          "type": "string",
+         |          "doc": "initial date"
+         |      },
+         |      {
+         |          "name": "doctor",
+         |          "type": "int",
+         |          "doc": "main actor playing the Doctor in episode"
+         |      }
+         |    ]
+         |  }'
+         |)
+       """.stripMargin.cmd,
+      // WORKAROUND: Required to pass schema to SerDe for partitioned tables.
+      // TODO: Pass this automatically from the table to partitions.
+      s"""
+         |ALTER TABLE episodes_part SET SERDEPROPERTIES (
+         |  'avro.schema.literal'='{
+         |    "type": "record",
+         |    "name": "episodes",
+         |    "namespace": "testing.hive.avro.serde",
+         |    "fields": [
+         |      {
+         |          "name": "title",
+         |          "type": "string",
+         |          "doc": "episode title"
+         |      },
+         |      {
+         |          "name": "air_date",
+         |          "type": "string",
+         |          "doc": "initial date"
+         |      },
+         |      {
+         |          "name": "doctor",
+         |          "type": "int",
+         |          "doc": "main actor playing the Doctor in episode"
+         |      }
+         |    ]
+         |  }'
+         |)
+        """.stripMargin.cmd,
+      s"""
+        INSERT OVERWRITE TABLE episodes_part PARTITION (doctor_pt=1)
+        SELECT title, air_date, doctor FROM episodes
+      """.cmd
+      )
   )
 
   hiveQTestUtilTables.foreach(registerTestTable)
diff --git a/sql/hive/src/test/resources/golden/Read Partitioned with AvroSerDe-0-e4501461c855cc9071a872a64186c3de b/sql/hive/src/test/resources/golden/Read Partitioned with AvroSerDe-0-e4501461c855cc9071a872a64186c3de
new file mode 100644
index 0000000000..49c8434730
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/Read Partitioned with AvroSerDe-0-e4501461c855cc9071a872a64186c3de
@@ -0,0 +1,8 @@
+The Eleventh Hour	3 April 2010	11	1
+The Doctor's Wife	14 May 2011	11	1
+Horror of Fang Rock	3 September 1977	4	1
+An Unearthly Child	23 November 1963	1	1
+The Mysterious Planet	6 September 1986	6	1
+Rose	26 March 2005	9	1
+The Power of the Daleks	5 November 1966	2	1
+Castrolava	4 January 1982	5	1
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index 8bc72384a6..7486bfa82b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -37,4 +37,6 @@ class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
   createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
 
   createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
+
+  createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
 }
author	Michael Armbrust <michael@databricks.com>	2014-09-10 20:57:38 -0700
committer	Michael Armbrust <michael@databricks.com>	2014-09-10 20:57:38 -0700
commit	84e2c8bfe41837baf2aeffa9741e4dbd14351981 (patch)
tree	ee307abd6291da4da321a7fa8f1212c18d810a8f
parent	79cdb9b64ad2fa3ab7f2c221766d36658b917c40 (diff)
download	spark-84e2c8bfe41837baf2aeffa9741e4dbd14351981.tar.gz spark-84e2c8bfe41837baf2aeffa9741e4dbd14351981.tar.bz2 spark-84e2c8bfe41837baf2aeffa9741e4dbd14351981.zip