aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorDavies Liu <davies.liu@gmail.com>2014-08-06 21:22:13 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-08-06 21:22:13 -0700
commitffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d (patch)
tree9ba6e575bcc92dc1ebcc65b1739ff89ed3335dfd /core
parenta263a7e9f060b3017142cdae5f1270db9458d8d3 (diff)
downloadspark-ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d.tar.gz
spark-ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d.tar.bz2
spark-ffd1f59a62a9dd9a4d5a7b09490b9d01ff1cd42d.zip
[SPARK-2887] fix bug of countApproxDistinct() when have more than one partition
fix bug of countApproxDistinct() when have more than one partition Author: Davies Liu <davies.liu@gmail.com> Closes #1812 from davies/approx and squashes the following commits: bf757ce [Davies Liu] fix bug of countApproxDistinct() when have more than one partition
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala10
2 files changed, 6 insertions, 6 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e1c49e35ab..0159003c88 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1004,7 +1004,7 @@ abstract class RDD[T: ClassTag](
},
(h1: HyperLogLogPlus, h2: HyperLogLogPlus) => {
h1.addAll(h2)
- h2
+ h1
}).cardinality()
}
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index b31e3a09e5..4a7dc8dca2 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -81,11 +81,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
def error(est: Long, size: Long) = math.abs(est - size) / size.toDouble
- val size = 100
- val uniformDistro = for (i <- 1 to 100000) yield i % size
- val simpleRdd = sc.makeRDD(uniformDistro)
- assert(error(simpleRdd.countApproxDistinct(4, 0), size) < 0.4)
- assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.1)
+ val size = 1000
+ val uniformDistro = for (i <- 1 to 5000) yield i % size
+ val simpleRdd = sc.makeRDD(uniformDistro, 10)
+ assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2)
+ assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1)
}
test("SparkContext.union") {