aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2017-04-11 20:33:10 +0800
committerWenchen Fan <wenchen@databricks.com>2017-04-11 20:33:10 +0800
commitcd91f967145909852d9af09b10b80f86ed05edb5 (patch)
tree3868d4a96aefb86daedebe5c6e2aa950bcbe16d1 /sql
parentc8706980ae07362ae5963829e9ada5007eada46b (diff)
downloadspark-cd91f967145909852d9af09b10b80f86ed05edb5.tar.gz
spark-cd91f967145909852d9af09b10b80f86ed05edb5.tar.bz2
spark-cd91f967145909852d9af09b10b80f86ed05edb5.zip
[SPARK-20175][SQL] Exists should not be evaluated in Join operator
## What changes were proposed in this pull request? Similar to `ListQuery`, `Exists` should not be evaluated in `Join` operator too. ## How was this patch tested? Jenkins tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #17491 from viirya/dont-push-exists-to-join.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala3
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala10
2 files changed, 12 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 8acb740f8d..5034566132 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -92,11 +92,12 @@ trait PredicateHelper {
protected def canEvaluateWithinJoin(expr: Expression): Boolean = expr match {
// Non-deterministic expressions are not allowed as join conditions.
case e if !e.deterministic => false
- case l: ListQuery =>
+ case _: ListQuery | _: Exists =>
// A ListQuery defines the query which we want to search in an IN subquery expression.
// Currently the only way to evaluate an IN subquery is to convert it to a
// LeftSemi/LeftAnti/ExistenceJoin by `RewritePredicateSubquery` rule.
// It cannot be evaluated as part of a Join operator.
+ // An Exists shouldn't be push into a Join operator too.
false
case e: SubqueryExpression =>
// non-correlated subquery will be replaced as literal
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 5fe6667cec..0f0199cbe2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -844,4 +844,14 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
Row(0) :: Row(1) :: Nil)
}
}
+
+ test("ListQuery and Exists should work even no correlated references") {
+ checkAnswer(
+ sql("select * from l, r where l.a = r.c AND (r.d in (select d from r) OR l.a >= 1)"),
+ Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) :: Row(2, 1.0, 2, 3.0) ::
+ Row(2, 1.0, 2, 3.0) :: Row(3.0, 3.0, 3, 2.0) :: Row(6, null, 6, null) :: Nil)
+ checkAnswer(
+ sql("select * from l, r where l.a = r.c + 1 AND (exists (select * from r) OR l.a = r.c)"),
+ Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
+ }
}