diff options
author | Wenchen Fan <cloud0fan@outlook.com> | 2015-09-08 12:05:41 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-09-08 12:05:41 -0700 |
commit | 5fd57955ef477347408f68eb1cb6ad1881fdb6e0 (patch) | |
tree | 5680c9e6446883bd2cdb90ff5a0e67be1d3b4681 /sql/core/src/test | |
parent | 5b2192e846b843d8a0cb9427d19bb677431194a0 (diff) | |
download | spark-5fd57955ef477347408f68eb1cb6ad1881fdb6e0.tar.gz spark-5fd57955ef477347408f68eb1cb6ad1881fdb6e0.tar.bz2 spark-5fd57955ef477347408f68eb1cb6ad1881fdb6e0.zip |
[SPARK-10316] [SQL] respect nondeterministic expressions in PhysicalOperation
We did a lot of special handling for non-deterministic expressions in `Optimizer`. However, `PhysicalOperation` just collects all Projects and Filters and mess it up. We should respect the operators order caused by non-deterministic expressions in `PhysicalOperation`.
Author: Wenchen Fan <cloud0fan@outlook.com>
Closes #8486 from cloud-fan/fix.
Diffstat (limited to 'sql/core/src/test')
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index b5b9f11785..dbed4fc247 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -22,6 +22,8 @@ import java.io.File import scala.language.postfixOps import scala.util.Random +import org.scalatest.Matchers._ + import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ @@ -895,4 +897,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { .orderBy(sum('j)) checkAnswer(query, Row(1, 2)) } + + test("SPARK-10316: respect non-deterministic expressions in PhysicalOperation") { + val input = sqlContext.read.json(sqlContext.sparkContext.makeRDD( + (1 to 10).map(i => s"""{"id": $i}"""))) + + val df = input.select($"id", rand(0).as('r)) + df.as("a").join(df.filter($"r" < 0.5).as("b"), $"a.id" === $"b.id").collect().foreach { row => + assert(row.getDouble(1) - row.getDouble(3) === 0.0 +- 0.001) + } + } } |