aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala13
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql24
-rw-r--r--sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out50
3 files changed, 82 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index ba3fd1d5f8..2a3e07aebe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -80,14 +80,19 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
// Note that will almost certainly be planned as a Broadcast Nested Loop join.
// Use EXISTS if performance matters to you.
val inConditions = getValueExpression(value).zip(sub.output).map(EqualTo.tupled)
- val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions ++ conditions, p)
+ val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions, p)
// Expand the NOT IN expression with the NULL-aware semantic
// to its full form. That is from:
- // (a1,b1,...) = (a2,b2,...)
+ // (a1,a2,...) = (b1,b2,...)
// to
- // (a1=a2 OR isnull(a1=a2)) AND (b1=b2 OR isnull(b1=b2)) AND ...
+ // (a1=b1 OR isnull(a1=b1)) AND (a2=b2 OR isnull(a2=b2)) AND ...
val joinConds = splitConjunctivePredicates(joinCond.get)
- val pairs = joinConds.map(c => Or(c, IsNull(c))).reduceLeft(And)
+ // After that, add back the correlated join predicate(s) in the subquery
+ // Example:
+ // SELECT ... FROM A WHERE A.A1 NOT IN (SELECT B.B1 FROM B WHERE B.B2 = A.A2 AND B.B3 > 1)
+ // will have the final conditions in the LEFT ANTI as
+ // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2)
+ val pairs = (joinConds.map(c => Or(c, IsNull(c))) ++ conditions).reduceLeft(And)
Join(outerPlan, sub, LeftAnti, Option(pairs))
case (p, predicate) =>
val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
index 20370b045e..f19567d2fa 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
@@ -109,4 +109,28 @@ FROM t1
WHERE t1a NOT IN (SELECT t2a
FROM t2);
+-- DDLs
+create temporary view a as select * from values
+ (1, 1), (2, 1), (null, 1), (1, 3), (null, 3), (1, null), (null, 2)
+ as a(a1, a2);
+create temporary view b as select * from values
+ (1, 1, 2), (null, 3, 2), (1, null, 2), (1, 2, null)
+ as b(b1, b2, b3);
+
+-- TC 02.01
+SELECT a1, a2
+FROM a
+WHERE a1 NOT IN (SELECT b.b1
+ FROM b
+ WHERE a.a2 = b.b2)
+;
+
+-- TC 02.02
+SELECT a1, a2
+FROM a
+WHERE a1 NOT IN (SELECT b.b1
+ FROM b
+ WHERE a.a2 = b.b2
+ AND b.b3 > 1)
+;
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
index 66493d7fcc..d69b4bcf18 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 14
-- !query 0
@@ -174,3 +174,51 @@ t1a 6 2014-04-04 01:02:00.001
t1d 10 2015-05-04 01:01:00
t1d NULL 2014-06-04 01:01:00
t1d NULL 2014-07-04 01:02:00.001
+
+
+-- !query 10
+create temporary view a as select * from values
+ (1, 1), (2, 1), (null, 1), (1, 3), (null, 3), (1, null), (null, 2)
+ as a(a1, a2)
+-- !query 10 schema
+struct<>
+-- !query 10 output
+
+
+
+-- !query 11
+create temporary view b as select * from values
+ (1, 1, 2), (null, 3, 2), (1, null, 2), (1, 2, null)
+ as b(b1, b2, b3)
+-- !query 11 schema
+struct<>
+-- !query 11 output
+
+
+
+-- !query 12
+SELECT a1, a2
+FROM a
+WHERE a1 NOT IN (SELECT b.b1
+ FROM b
+ WHERE a.a2 = b.b2)
+-- !query 12 schema
+struct<a1:int,a2:int>
+-- !query 12 output
+1 NULL
+2 1
+
+
+-- !query 13
+SELECT a1, a2
+FROM a
+WHERE a1 NOT IN (SELECT b.b1
+ FROM b
+ WHERE a.a2 = b.b2
+ AND b.b3 > 1)
+-- !query 13 schema
+struct<a1:int,a2:int>
+-- !query 13 output
+1 NULL
+2 1
+NULL 2