diff options
author | Timothy Hunter <timhunter@databricks.com> | 2016-09-11 08:03:45 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-09-11 08:03:45 +0100 |
commit | 180796ecb3a00facde2d98affdb5aa38dd258875 (patch) | |
tree | c082d3431c1a266e9526b480188324a19186b7da /sql/catalyst/src/test | |
parent | 29ba9578f44c7caa8451386cee1f03f4e0ed8fc7 (diff) | |
download | spark-180796ecb3a00facde2d98affdb5aa38dd258875.tar.gz spark-180796ecb3a00facde2d98affdb5aa38dd258875.tar.bz2 spark-180796ecb3a00facde2d98affdb5aa38dd258875.zip |
[SPARK-17439][SQL] Fixing compression issues with approximate quantiles and adding more tests
## What changes were proposed in this pull request?
This PR build on #14976 and fixes a correctness bug that would cause the wrong quantile to be returned for small target errors.
## How was this patch tested?
This PR adds 8 unit tests that were failing without the fix.
Author: Timothy Hunter <timhunter@databricks.com>
Author: Sean Owen <sowen@cloudera.com>
Closes #15002 from thunterdb/ml-1783.
Diffstat (limited to 'sql/catalyst/src/test')
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala | 29 |
1 files changed, 27 insertions, 2 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala index 89b2a22a3d..5e90970b1b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala @@ -40,6 +40,20 @@ class QuantileSummariesSuite extends SparkFunSuite { summary.compress() } + /** + * Interleaves compression and insertions. + */ + private def buildCompressSummary( + data: Seq[Double], + epsi: Double, + threshold: Int): QuantileSummaries = { + var summary = new QuantileSummaries(threshold, epsi) + data.foreach { x => + summary = summary.insert(x).compress() + } + summary + } + private def checkQuantile(quant: Double, data: Seq[Double], summary: QuantileSummaries): Unit = { val approx = summary.query(quant) // The rank of the approximation. @@ -54,8 +68,8 @@ class QuantileSummariesSuite extends SparkFunSuite { for { (seq_name, data) <- Seq(increasing, decreasing, random) - epsi <- Seq(0.1, 0.0001) - compression <- Seq(1000, 10) + epsi <- Seq(0.1, 0.0001) // With a significant value and with full precision + compression <- Seq(1000, 10) // This interleaves n so that we test without and with compression } { test(s"Extremas with epsi=$epsi and seq=$seq_name, compression=$compression") { @@ -75,6 +89,17 @@ class QuantileSummariesSuite extends SparkFunSuite { checkQuantile(0.1, data, s) checkQuantile(0.001, data, s) } + + test(s"Some quantile values with epsi=$epsi and seq=$seq_name, compression=$compression " + + s"(interleaved)") { + val s = buildCompressSummary(data, epsi, compression) + assert(s.count == data.size, s"Found count=${s.count} but data size=${data.size}") + checkQuantile(0.9999, data, s) + checkQuantile(0.9, data, s) + checkQuantile(0.5, data, s) + checkQuantile(0.1, data, s) + checkQuantile(0.001, data, s) + } } // Tests for merging procedure |