aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-05-26 13:21:00 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-26 13:21:00 -0700
commit61664732b25b35f94be35a42cde651cbfd0e02b7 (patch)
tree6a58765a9f004709f8cb469bbcde25c452e188f1 /mllib
parentb7d8085942c564d6c5b81a14d31789e1b215e62b (diff)
downloadspark-61664732b25b35f94be35a42cde651cbfd0e02b7.tar.gz
spark-61664732b25b35f94be35a42cde651cbfd0e02b7.tar.bz2
spark-61664732b25b35f94be35a42cde651cbfd0e02b7.zip
[SPARK-7844] [MLLIB] Fix broken tests in KernelDensity
The densities in KernelDensity are scaled down by (number of parallel processes X number of points). It should be just no.of samples. This results in broken tests in KernelDensitySuite which haven't been tested properly. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #6383 from MechCoder/spark-7844 and squashes the following commits: ab81302 [MechCoder] Math->math 9b8ed50 [MechCoder] Make one pass to update count a92fe50 [MechCoder] [SPARK-7844] Fix broken tests in KernelDensity
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala10
2 files changed, 7 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
index a6bfe26e1e..58a50f9c19 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala
@@ -93,7 +93,7 @@ class KernelDensity extends Serializable {
x._1(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, points(i))
i += 1
}
- (x._1, n)
+ (x._1, x._2 + 1)
},
(x, y) => {
blas.daxpy(n, 1.0, y._1, 1, x._1, 1)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
index 14bb1cebf0..a309c942cf 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala
@@ -29,8 +29,8 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext {
val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints)
val normal = new NormalDistribution(5.0, 3.0)
val acceptableErr = 1e-6
- assert(densities(0) - normal.density(5.0) < acceptableErr)
- assert(densities(0) - normal.density(6.0) < acceptableErr)
+ assert(math.abs(densities(0) - normal.density(5.0)) < acceptableErr)
+ assert(math.abs(densities(1) - normal.density(6.0)) < acceptableErr)
}
test("kernel density multiple samples") {
@@ -40,7 +40,9 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext {
val normal1 = new NormalDistribution(5.0, 3.0)
val normal2 = new NormalDistribution(10.0, 3.0)
val acceptableErr = 1e-6
- assert(densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2 < acceptableErr)
- assert(densities(0) - (normal1.density(6.0) + normal2.density(6.0)) / 2 < acceptableErr)
+ assert(math.abs(
+ densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2) < acceptableErr)
+ assert(math.abs(
+ densities(1) - (normal1.density(6.0) + normal2.density(6.0)) / 2) < acceptableErr)
}
}