diff options
author | Sameer Agarwal <sameerag@cs.berkeley.edu> | 2016-08-24 21:24:24 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-08-24 21:24:24 -0700 |
commit | ac27557eb622a257abeb3e8551f06ebc72f87133 (patch) | |
tree | bfcfa76a78e11debdf17e7302c40525334dae783 /sql/catalyst/src | |
parent | 3a60be4b15a5ab9b6e0c4839df99dac7738aa7fe (diff) | |
download | spark-ac27557eb622a257abeb3e8551f06ebc72f87133.tar.gz spark-ac27557eb622a257abeb3e8551f06ebc72f87133.tar.bz2 spark-ac27557eb622a257abeb3e8551f06ebc72f87133.zip |
[SPARK-17228][SQL] Not infer/propagate non-deterministic constraints
## What changes were proposed in this pull request?
Given that filters based on non-deterministic constraints shouldn't be pushed down in the query plan, unnecessarily inferring them is confusing and a source of potential bugs. This patch simplifies the inferring logic by simply ignoring them.
## How was this patch tested?
Added a new test in `ConstraintPropagationSuite`.
Author: Sameer Agarwal <sameerag@cs.berkeley.edu>
Closes #14795 from sameeragarwal/deterministic-constraints.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala | 3 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala | 17 |
2 files changed, 19 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 8ee31f42ad..0fb6e7d2e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -35,7 +35,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT .union(inferAdditionalConstraints(constraints)) .union(constructIsNotNullConstraints(constraints)) .filter(constraint => - constraint.references.nonEmpty && constraint.references.subsetOf(outputSet)) + constraint.references.nonEmpty && constraint.references.subsetOf(outputSet) && + constraint.deterministic) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala index 5a76969235..8d6a49a8a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala @@ -352,4 +352,21 @@ class ConstraintPropagationSuite extends SparkFunSuite { verifyConstraints(tr.analyze.constraints, ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "b")), IsNotNull(resolveColumn(tr, "c"))))) } + + test("not infer non-deterministic constraints") { + val tr = LocalRelation('a.int, 'b.string, 'c.int) + + verifyConstraints(tr + .where('a.attr === Rand(0)) + .analyze.constraints, + ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "a"))))) + + verifyConstraints(tr + .where('a.attr === InputFileName()) + .where('a.attr =!= 'c.attr) + .analyze.constraints, + ExpressionSet(Seq(resolveColumn(tr, "a") =!= resolveColumn(tr, "c"), + IsNotNull(resolveColumn(tr, "a")), + IsNotNull(resolveColumn(tr, "c"))))) + } } |