aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-05-06 21:06:03 -0700
committerDavies Liu <davies.liu@gmail.com>2016-05-06 21:06:03 -0700
commitdf89f1d43d4eaa1dd8a439a8e48bca16b67d5b48 (patch)
tree560158382acf87ae098ed1b79ca357d5701c9143 /sql
parent607a27a0d149be049091bcf274a73b8476b36c90 (diff)
downloadspark-df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48.tar.gz
spark-df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48.tar.bz2
spark-df89f1d43d4eaa1dd8a439a8e48bca16b67d5b48.zip
[SPARK-15122] [SQL] Fix TPC-DS 41 - Normalize predicates before pulling them out
## What changes were proposed in this pull request? The official TPC-DS 41 query currently fails because it contains a scalar subquery with a disjunctive correlated predicate (the correlated predicates were nested in ORs). This makes the `Analyzer` pull out the entire predicate which is wrong and causes the following (correct) analysis exception: `The correlated scalar subquery can only contain equality predicates` This PR fixes this by first simplifing (or normalizing) the correlated predicates before pulling them out of the subquery. ## How was this patch tested? Manual testing on TPC-DS 41, and added a test to SubquerySuite. Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #12954 from hvanhovell/SPARK-15122.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala12
2 files changed, 15 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 527d5b635a..9e9a856286 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.encoders.OuterScopes
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
import org.apache.spark.sql.catalyst.planning.IntegerIndex
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _}
@@ -958,7 +959,8 @@ class Analyzer(
localPredicateReferences -- p.outputSet
}
- val transformed = sub transformUp {
+ // Simplify the predicates before pulling them out.
+ val transformed = BooleanSimplification(sub) transformUp {
case f @ Filter(cond, child) =>
// Find all predicates with an outer reference.
val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 80bb4e0538..17ac0c8c6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -281,4 +281,16 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
assert(msg1.getMessage.contains(
"The correlated scalar subquery can only contain equality predicates"))
}
+
+ test("disjunctive correlated scalar subquery") {
+ checkAnswer(
+ sql("""
+ |select a
+ |from l
+ |where (select count(*)
+ | from r
+ | where (a = c and d = 2.0) or (a = c and d = 1.0)) > 0
+ """.stripMargin),
+ Row(3) :: Nil)
+ }
}