aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-05-10 12:47:31 -0700
committerYin Huai <yhuai@databricks.com>2016-05-10 12:47:31 -0700
commitd28c67544b26c38d51a31d1f8dac3fc23860e1ef (patch)
treeccf2d39c55bf8e2c25177f470c1c4c839bb411f1
parent89f73f674126bbc1cc101f0f5731b5750f1c90c8 (diff)
downloadspark-d28c67544b26c38d51a31d1f8dac3fc23860e1ef.tar.gz
spark-d28c67544b26c38d51a31d1f8dac3fc23860e1ef.tar.bz2
spark-d28c67544b26c38d51a31d1f8dac3fc23860e1ef.zip
[SPARK-14986][SQL] Return correct result for empty LATERAL VIEW OUTER
## What changes were proposed in this pull request? A Generate with the `outer` flag enabled should always return one or more rows for every input row. The optimizer currently violates this by rewriting `outer` Generates that do not contain columns of the child plan into an unjoined generate, for example: ```sql select e from a lateral view outer explode(a.b) as e ``` The result of this is that `outer` Generate does not produce output at all when the Generators' input expression is empty. This PR fixes this. ## How was this patch tested? Added test case to `SQLQuerySuite`. Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #12906 from hvanhovell/SPARK-14986.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala3
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala7
2 files changed, 9 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index a3ab89dc71..350b60134e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -362,7 +362,8 @@ object ColumnPruning extends Rule[LogicalPlan] {
g.copy(child = prunedChild(g.child, g.references))
// Turn off `join` for Generate if no column from it's child is used
- case p @ Project(_, g: Generate) if g.join && p.references.subsetOf(g.generatedSet) =>
+ case p @ Project(_, g: Generate)
+ if g.join && !g.outer && p.references.subsetOf(g.generatedSet) =>
p.copy(child = g.copy(join = false))
// Eliminate unneeded attributes from right side of a Left Existence Join.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index e401abef29..4ef4b4865f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2473,4 +2473,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
Row("r3c1x", "r3c2", "t1r3c3", "r3c2", "t1r3c3") :: Nil)
}
}
+
+ test("SPARK-14986: Outer lateral view with empty generate expression") {
+ checkAnswer(
+ sql("select nil from (select 1 as x ) x lateral view outer explode(array()) n as nil"),
+ Row(null) :: Nil
+ )
+ }
}