diff options
author | Dilip Biswal <dbiswal@us.ibm.com> | 2016-03-07 09:46:28 -0800 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-03-07 09:46:28 -0800 |
commit | d7eac9d7951c19302ed41fe03eaa38394aeb9c1a (patch) | |
tree | aa1fbead555f56801a30f888849a6be9e093768c /sql | |
parent | 03f57a6c2dd6ffd4038ca9cecbfc221deaf52393 (diff) | |
download | spark-d7eac9d7951c19302ed41fe03eaa38394aeb9c1a.tar.gz spark-d7eac9d7951c19302ed41fe03eaa38394aeb9c1a.tar.bz2 spark-d7eac9d7951c19302ed41fe03eaa38394aeb9c1a.zip |
[SPARK-13651] Generator outputs are not resolved correctly resulting in run time error
## What changes were proposed in this pull request?
```
Seq(("id1", "value1")).toDF("key", "value").registerTempTable("src")
sqlContext.sql("SELECT t1.* FROM src LATERAL VIEW explode(map('key1', 100, 'key2', 200)) t1 AS key, value")
```
Results in following logical plan
```
Project [key#2,value#3]
+- Generate explode(HiveGenericUDF#org.apache.hadoop.hive.ql.udf.generic.GenericUDFMap(key1,100,key2,200)), true, false, Some(genoutput), [key#2,value#3]
+- SubqueryAlias src
+- Project [_1#0 AS key#2,_2#1 AS value#3]
+- LocalRelation [_1#0,_2#1], [[id1,value1]]
```
The above query fails with following runtime error.
```
java.lang.ClassCastException: java.lang.Integer cannot be cast to org.apache.spark.unsafe.types.UTF8String
at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow$class.getUTF8String(rows.scala:46)
at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getUTF8String(rows.scala:221)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(generated.java:42)
at org.apache.spark.sql.execution.Generate$$anonfun$doExecute$1$$anonfun$apply$9.apply(Generate.scala:98)
at org.apache.spark.sql.execution.Generate$$anonfun$doExecute$1$$anonfun$apply$9.apply(Generate.scala:96)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:370)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:370)
at scala.collection.Iterator$class.foreach(Iterator.scala:742)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
<stack-trace omitted.....>
```
In this case the generated outputs are wrongly resolved from its child (LocalRelation) due to
https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala#L537-L548
## How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
Added unit tests in hive/SQLQuerySuite and AnalysisSuite
Author: Dilip Biswal <dbiswal@us.ibm.com>
Closes #11497 from dilipbiswal/spark-13651.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 10 |
2 files changed, 13 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index fbbc3ee891..b5fa372643 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -512,8 +512,9 @@ class Analyzer( // A special case for Generate, because the output of Generate should not be resolved by // ResolveReferences. Attributes in the output will be resolved by ResolveGenerate. - case g @ Generate(generator, join, outer, qualifier, output, child) - if child.resolved && !generator.resolved => + case g @ Generate(generator, _, _, _, _, _) if generator.resolved => g + + case g @ Generate(generator, join, outer, qualifier, output, child) => val newG = resolveExpression(generator, child, throws = true) if (newG.fastEquals(generator)) { g diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index e478bcd0ed..2f8c2beb17 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -92,6 +92,16 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { checkAnswer(query, Row(1, 1) :: Row(1, 2) :: Row(1, 3) :: Nil) } + test("SPARK-13651: generator outputs shouldn't be resolved from its child's output") { + withTempTable("src") { + Seq(("id1", "value1")).toDF("key", "value").registerTempTable("src") + val query = + sql("SELECT genoutput.* FROM src " + + "LATERAL VIEW explode(map('key1', 100, 'key2', 200)) genoutput AS key, value") + checkAnswer(query, Row("key1", 100) :: Row("key2", 200) :: Nil) + } + } + test("SPARK-6851: Self-joined converted parquet tables") { val orders = Seq( Order(1, "Atlas", "MTB", 234, "2015-01-07", "John D", "Pacifica", "CA", 20151), |