aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala16
1 files changed, 15 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b6ad5db74e..6ba8b33b3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -689,7 +689,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
// state and all the input rows processed before. In another word, the order of input rows
// matters for non-deterministic expressions, while pushing down predicates changes the order.
case filter @ Filter(condition, project @ Project(fields, grandChild))
- if fields.forall(_.deterministic) =>
+ if fields.forall(_.deterministic) && canPushThroughCondition(grandChild, condition) =>
// Create a map of Aliases to their values from the child projection.
// e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b).
@@ -830,6 +830,20 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
filter
}
}
+
+ /**
+ * Check if we can safely push a filter through a projection, by making sure that predicate
+ * subqueries in the condition do not contain the same attributes as the plan they are moved
+ * into. This can happen when the plan and predicate subquery have the same source.
+ */
+ private def canPushThroughCondition(plan: LogicalPlan, condition: Expression): Boolean = {
+ val attributes = plan.outputSet
+ val matched = condition.find {
+ case PredicateSubquery(p, _, _, _) => p.outputSet.intersect(attributes).nonEmpty
+ case _ => false
+ }
+ matched.isEmpty
+ }
}
/**