diff options
author | Herman van Hovell <hvanhovell@questtec.nl> | 2016-05-06 21:06:03 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-05-06 21:06:03 -0700 |
commit | df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48 (patch) | |
tree | 560158382acf87ae098ed1b79ca357d5701c9143 /sql | |
parent | 607a27a0d149be049091bcf274a73b8476b36c90 (diff) | |
download | spark-df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48.tar.gz spark-df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48.tar.bz2 spark-df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48.zip |
[SPARK-15122] [SQL] Fix TPC-DS 41 - Normalize predicates before pulling them out
## What changes were proposed in this pull request?
The official TPC-DS 41 query currently fails because it contains a scalar subquery with a disjunctive correlated predicate (the correlated predicates were nested in ORs). This makes the `Analyzer` pull out the entire predicate which is wrong and causes the following (correct) analysis exception: `The correlated scalar subquery can only contain equality predicates`
This PR fixes this by first simplifing (or normalizing) the correlated predicates before pulling them out of the subquery.
## How was this patch tested?
Manual testing on TPC-DS 41, and added a test to SubquerySuite.
Author: Herman van Hovell <hvanhovell@questtec.nl>
Closes #12954 from hvanhovell/SPARK-15122.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala | 12 |
2 files changed, 15 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 527d5b635a..9e9a856286 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification import org.apache.spark.sql.catalyst.planning.IntegerIndex import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _} @@ -958,7 +959,8 @@ class Analyzer( localPredicateReferences -- p.outputSet } - val transformed = sub transformUp { + // Simplify the predicates before pulling them out. + val transformed = BooleanSimplification(sub) transformUp { case f @ Filter(cond, child) => // Find all predicates with an outer reference. val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 80bb4e0538..17ac0c8c6e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -281,4 +281,16 @@ class SubquerySuite extends QueryTest with SharedSQLContext { assert(msg1.getMessage.contains( "The correlated scalar subquery can only contain equality predicates")) } + + test("disjunctive correlated scalar subquery") { + checkAnswer( + sql(""" + |select a + |from l + |where (select count(*) + | from r + | where (a = c and d = 2.0) or (a = c and d = 1.0)) > 0 + """.stripMargin), + Row(3) :: Nil) + } } |