aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorNattavut Sutyanyong <nsy.can@gmail.com>2016-11-24 12:07:55 -0800
committerHerman van Hovell <hvanhovell@databricks.com>2016-11-24 12:07:55 -0800
commita367d5ff005884322fb8bb43a1cfa4d4bf54b31a (patch)
treea33293ffc84123dfff1f357ad6714eff7a7ac7b3 /sql/catalyst
parent2dfabec38c24174e7f747c27c7144f7738483ec1 (diff)
downloadspark-a367d5ff005884322fb8bb43a1cfa4d4bf54b31a.tar.gz
spark-a367d5ff005884322fb8bb43a1cfa4d4bf54b31a.tar.bz2
spark-a367d5ff005884322fb8bb43a1cfa4d4bf54b31a.zip
[SPARK-18578][SQL] Full outer join in correlated subquery returns incorrect results
## What changes were proposed in this pull request? - Raise Analysis exception when correlated predicates exist in the descendant operators of either operand of a Full outer join in a subquery as well as in a FOJ operator itself - Raise Analysis exception when correlated predicates exists in a Window operator (a side effect inadvertently introduced by SPARK-17348) ## How was this patch tested? Run sql/test catalyst/test and new test cases, added to SubquerySuite, showing the reported incorrect results. Author: Nattavut Sutyanyong <nsy.can@gmail.com> Closes #16005 from nsyca/FOJ-incorrect.1.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala10
1 files changed, 10 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0155741ddb..1db44496e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1076,6 +1076,10 @@ class Analyzer(
// Simplify the predicates before pulling them out.
val transformed = BooleanSimplification(sub) transformUp {
+ // WARNING:
+ // Only Filter can host correlated expressions at this time
+ // Anyone adding a new "case" below needs to add the call to
+ // "failOnOuterReference" to disallow correlated expressions in it.
case f @ Filter(cond, child) =>
// Find all predicates with an outer reference.
val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter)
@@ -1116,12 +1120,18 @@ class Analyzer(
a
}
case w : Window =>
+ failOnOuterReference(w)
failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, w)
w
case j @ Join(left, _, RightOuter, _) =>
failOnOuterReference(j)
failOnOuterReferenceInSubTree(left, "a RIGHT OUTER JOIN")
j
+ // SPARK-18578: Do not allow any correlated predicate
+ // in a Full (Outer) Join operator and its descendants
+ case j @ Join(_, _, FullOuter, _) =>
+ failOnOuterReferenceInSubTree(j, "a FULL OUTER JOIN")
+ j
case j @ Join(_, right, jt, _) if !jt.isInstanceOf[InnerLike] =>
failOnOuterReference(j)
failOnOuterReferenceInSubTree(right, "a LEFT (OUTER) JOIN")