aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDilip Biswal <dbiswal@us.ibm.com>2016-03-07 09:46:28 -0800
committerDavies Liu <davies.liu@gmail.com>2016-03-07 09:46:28 -0800
commitd7eac9d7951c19302ed41fe03eaa38394aeb9c1a (patch)
treeaa1fbead555f56801a30f888849a6be9e093768c
parent03f57a6c2dd6ffd4038ca9cecbfc221deaf52393 (diff)
downloadspark-d7eac9d7951c19302ed41fe03eaa38394aeb9c1a.tar.gz
spark-d7eac9d7951c19302ed41fe03eaa38394aeb9c1a.tar.bz2
spark-d7eac9d7951c19302ed41fe03eaa38394aeb9c1a.zip
[SPARK-13651] Generator outputs are not resolved correctly resulting in run time error
## What changes were proposed in this pull request? ``` Seq(("id1", "value1")).toDF("key", "value").registerTempTable("src") sqlContext.sql("SELECT t1.* FROM src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) t1 AS key, value") ``` Results in following logical plan ``` Project [key#2,value#3] +- Generate explode(HiveGenericUDF#org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap(key1,100,key2,200)), true, false, Some(genoutput), [key#2,value#3] +- SubqueryAlias src +- Project [_1#0 AS key#2,_2#1 AS value#3] +- LocalRelation [_1#0,_2#1], [[id1,value1]] ``` The above query fails with following runtime error. ``` java.lang.ClassCastException: java.lang.Integer cannot be cast to org.apache.spark.unsafe.types.UTF8String at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getUTF8String(rows.scala:46) at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getUTF8String(rows.scala:221) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(generated.java:42) at org.apache.spark.sql.execution.Generate$$anonfun$doExecute$1$$anonfun$apply$9.apply(Generate.scala:98) at org.apache.spark.sql.execution.Generate$$anonfun$doExecute$1$$anonfun$apply$9.apply(Generate.scala:96) at scala.collection.Iterator$$anon$11.next(Iterator.scala:370) at scala.collection.Iterator$$anon$11.next(Iterator.scala:370) at scala.collection.Iterator$class.foreach(Iterator.scala:742) at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) <stack-trace omitted.....> ``` In this case the generated outputs are wrongly resolved from its child (LocalRelation) due to https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala#L537-L548 ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Added unit tests in hive/SQLQuerySuite and AnalysisSuite Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #11497 from dilipbiswal/spark-13651.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala10
2 files changed, 13 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index fbbc3ee891..b5fa372643 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -512,8 +512,9 @@ class Analyzer(
// A special case for Generate, because the output of Generate should not be resolved by
// ResolveReferences. Attributes in the output will be resolved by ResolveGenerate.
- case g @ Generate(generator, join, outer, qualifier, output, child)
- if child.resolved && !generator.resolved =>
+ case g @ Generate(generator, _, _, _, _, _) if generator.resolved => g
+
+ case g @ Generate(generator, join, outer, qualifier, output, child) =>
val newG = resolveExpression(generator, child, throws = true)
if (newG.fastEquals(generator)) {
g
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index e478bcd0ed..2f8c2beb17 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -92,6 +92,16 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkAnswer(query, Row(1, 1) :: Row(1, 2) :: Row(1, 3) :: Nil)
}
+ test("SPARK-13651: generator outputs shouldn't be resolved from its child's output") {
+ withTempTable("src") {
+ Seq(("id1", "value1")).toDF("key", "value").registerTempTable("src")
+ val query =
+ sql("SELECT genoutput.* FROM src " +
+ "LATERAL VIEW explode(map('key1', 100, 'key2', 200)) genoutput AS key, value")
+ checkAnswer(query, Row("key1", 100) :: Row("key2", 200) :: Nil)
+ }
+ }
+
test("SPARK-6851: Self-joined converted parquet tables") {
val orders = Seq(
Order(1, "Atlas", "MTB", 234, "2015-01-07", "John D", "Pacifica", "CA", 20151),