aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSameer Agarwal <sameerag@cs.berkeley.edu>2016-08-24 21:24:24 -0700
committerReynold Xin <rxin@databricks.com>2016-08-24 21:24:24 -0700
commitac27557eb622a257abeb3e8551f06ebc72f87133 (patch)
treebfcfa76a78e11debdf17e7302c40525334dae783
parent3a60be4b15a5ab9b6e0c4839df99dac7738aa7fe (diff)
downloadspark-ac27557eb622a257abeb3e8551f06ebc72f87133.tar.gz
spark-ac27557eb622a257abeb3e8551f06ebc72f87133.tar.bz2
spark-ac27557eb622a257abeb3e8551f06ebc72f87133.zip
[SPARK-17228][SQL] Not infer/propagate non-deterministic constraints
## What changes were proposed in this pull request? Given that filters based on non-deterministic constraints shouldn't be pushed down in the query plan, unnecessarily inferring them is confusing and a source of potential bugs. This patch simplifies the inferring logic by simply ignoring them. ## How was this patch tested? Added a new test in `ConstraintPropagationSuite`. Author: Sameer Agarwal <sameerag@cs.berkeley.edu> Closes #14795 from sameeragarwal/deterministic-constraints.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala3
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala17
2 files changed, 19 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 8ee31f42ad..0fb6e7d2e7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -35,7 +35,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
.union(inferAdditionalConstraints(constraints))
.union(constructIsNotNullConstraints(constraints))
.filter(constraint =>
- constraint.references.nonEmpty && constraint.references.subsetOf(outputSet))
+ constraint.references.nonEmpty && constraint.references.subsetOf(outputSet) &&
+ constraint.deterministic)
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 5a76969235..8d6a49a8a3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -352,4 +352,21 @@ class ConstraintPropagationSuite extends SparkFunSuite {
verifyConstraints(tr.analyze.constraints,
ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "b")), IsNotNull(resolveColumn(tr, "c")))))
}
+
+ test("not infer non-deterministic constraints") {
+ val tr = LocalRelation('a.int, 'b.string, 'c.int)
+
+ verifyConstraints(tr
+ .where('a.attr === Rand(0))
+ .analyze.constraints,
+ ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "a")))))
+
+ verifyConstraints(tr
+ .where('a.attr === InputFileName())
+ .where('a.attr =!= 'c.attr)
+ .analyze.constraints,
+ ExpressionSet(Seq(resolveColumn(tr, "a") =!= resolveColumn(tr, "c"),
+ IsNotNull(resolveColumn(tr, "a")),
+ IsNotNull(resolveColumn(tr, "c")))))
+ }
}