aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2015-08-18 14:45:19 -0700
committerMichael Armbrust <michael@databricks.com>2015-08-18 14:45:19 -0700
commit492ac1facbc79ee251d45cff315598ec9935a0e2 (patch)
tree4895ffb9a9bda7b8984b5a17f35c7b2b0f5f8a01 /sql
parentfa41e0242f075843beff7dc600d1a6bac004bdc7 (diff)
downloadspark-492ac1facbc79ee251d45cff315598ec9935a0e2.tar.gz
spark-492ac1facbc79ee251d45cff315598ec9935a0e2.tar.bz2
spark-492ac1facbc79ee251d45cff315598ec9935a0e2.zip
[SPARK-10088] [SQL] Add support for "stored as avro" in HiveQL parser.
Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #8282 from vanzin/SPARK-10088.
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala11
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala12
2 files changed, 13 insertions, 10 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index c3f2935010..ad33dee555 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -729,6 +729,17 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"),
outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+ case "avro" =>
+ tableDesc = tableDesc.copy(
+ inputFormat =
+ Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat"),
+ outputFormat =
+ Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"))
+ if (tableDesc.serde.isEmpty) {
+ tableDesc = tableDesc.copy(
+ serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe"))
+ }
+
case _ =>
throw new SemanticException(
s"Unrecognized file format in STORED AS clause: ${child.getText}")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 4eae699ac3..4da86636ac 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -25,10 +25,8 @@ import scala.language.implicitConversions
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.hadoop.hive.ql.exec.FunctionRegistry
-import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat}
import org.apache.hadoop.hive.ql.processors._
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.hive.serde2.avro.AvroSerDe
import org.apache.spark.sql.SQLConf
import org.apache.spark.sql.catalyst.analysis._
@@ -276,10 +274,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
"INSERT OVERWRITE TABLE serdeins SELECT * FROM src".cmd),
TestTable("episodes",
s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
- |ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
- |STORED AS
- |INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
- |OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
+ |STORED AS avro
|TBLPROPERTIES (
| 'avro.schema.literal'='{
| "type": "record",
@@ -312,10 +307,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
TestTable("episodes_part",
s"""CREATE TABLE episodes_part (title STRING, air_date STRING, doctor INT)
|PARTITIONED BY (doctor_pt INT)
- |ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
- |STORED AS
- |INPUTFORMAT '${classOf[AvroContainerInputFormat].getCanonicalName}'
- |OUTPUTFORMAT '${classOf[AvroContainerOutputFormat].getCanonicalName}'
+ |STORED AS avro
|TBLPROPERTIES (
| 'avro.schema.literal'='{
| "type": "record",