diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-08-06 21:22:13 -0700 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2014-08-06 21:22:13 -0700 |
commit | ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d (patch) | |
tree | 9ba6e575bcc92dc1ebcc65b1739ff89ed3335dfd /core | |
parent | a263a7e9f060b3017142cdae5f1270db9458d8d3 (diff) | |
download | spark-ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d.tar.gz spark-ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d.tar.bz2 spark-ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d.zip |
[SPARK-2887] fix bug of countApproxDistinct() when have more than one partition
fix bug of countApproxDistinct() when have more than one partition
Author: Davies Liu <davies.liu@gmail.com>
Closes #1812 from davies/approx and squashes the following commits:
bf757ce [Davies Liu] fix bug of countApproxDistinct() when have more than one partition
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 2 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala | 10 |
2 files changed, 6 insertions, 6 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index e1c49e35ab..0159003c88 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1004,7 +1004,7 @@ abstract class RDD[T: ClassTag]( }, (h1: HyperLogLogPlus, h2: HyperLogLogPlus) => { h1.addAll(h2) - h2 + h1 }).cardinality() } diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index b31e3a09e5..4a7dc8dca2 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -81,11 +81,11 @@ class RDDSuite extends FunSuite with SharedSparkContext { def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble - val size = 100 - val uniformDistro = for (i <- 1 to 100000) yield i % size - val simpleRdd = sc.makeRDD(uniformDistro) - assert(error(simpleRdd.countApproxDistinct(4, 0), size) < 0.4) - assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.1) + val size = 1000 + val uniformDistro = for (i <- 1 to 5000) yield i % size + val simpleRdd = sc.makeRDD(uniformDistro, 10) + assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2) + assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1) } test("SparkContext.union") { |