diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-06-17 15:26:36 -0700 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2013-06-17 15:26:36 -0700 |
commit | db42451a52ed5b0f228c87ddeb07c118e9d56ef6 (patch) | |
tree | 27a49c91e3c61dd7eacc2c903513e906be64d6c0 /core/src/test | |
parent | e82a2ffcc93fb72ac04b29d1ed13f26f6ac52f6c (diff) | |
parent | f91195cc150a3ead122046d14bd35b4fcf28c9cb (diff) | |
download | spark-db42451a52ed5b0f228c87ddeb07c118e9d56ef6.tar.gz spark-db42451a52ed5b0f228c87ddeb07c118e9d56ef6.tar.bz2 spark-db42451a52ed5b0f228c87ddeb07c118e9d56ef6.zip |
Merge pull request #643 from adatao/master
Bug fix: Zero-length partitions result in NaN for overall mean & variance
Diffstat (limited to 'core/src/test')
-rw-r--r-- | core/src/test/scala/spark/PartitioningSuite.scala | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/core/src/test/scala/spark/PartitioningSuite.scala b/core/src/test/scala/spark/PartitioningSuite.scala index 60db759c25..16f93e71a3 100644 --- a/core/src/test/scala/spark/PartitioningSuite.scala +++ b/core/src/test/scala/spark/PartitioningSuite.scala @@ -1,10 +1,10 @@ package spark import org.scalatest.FunSuite - import scala.collection.mutable.ArrayBuffer - import SparkContext._ +import spark.util.StatCounter +import scala.math.abs class PartitioningSuite extends FunSuite with LocalSparkContext { @@ -120,4 +120,21 @@ class PartitioningSuite extends FunSuite with LocalSparkContext { assert(intercept[SparkException]{ arrPairs.reduceByKeyLocally(_ + _) }.getMessage.contains("array")) assert(intercept[SparkException]{ arrPairs.reduceByKey(_ + _) }.getMessage.contains("array")) } + + test("Zero-length partitions should be correctly handled") { + // Create RDD with some consecutive empty partitions (including the "first" one) + sc = new SparkContext("local", "test") + val rdd: RDD[Double] = sc + .parallelize(Array(-1.0, -1.0, -1.0, -1.0, 2.0, 4.0, -1.0, -1.0), 8) + .filter(_ >= 0.0) + + // Run the partitions, including the consecutive empty ones, through StatCounter + val stats: StatCounter = rdd.stats(); + assert(abs(6.0 - stats.sum) < 0.01); + assert(abs(6.0/2 - rdd.mean) < 0.01); + assert(abs(1.0 - rdd.variance) < 0.01); + assert(abs(1.0 - rdd.stdev) < 0.01); + + // Add other tests here for classes that should be able to handle empty partitions correctly + } } |