aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorZheng RuiFeng <ruifengz@foxmail.com>2016-10-24 10:25:24 +0100
committerSean Owen <sowen@cloudera.com>2016-10-24 10:25:24 +0100
commitc64a8ff39794d60c596c0d34130019c09c9c8012 (patch)
tree41a90c70412425b9283b375b268feb473b791b81 /mllib
parent3a423f5a0373de87ddfb4744852b2fda14fcc3cb (diff)
downloadspark-c64a8ff39794d60c596c0d34130019c09c9c8012.tar.gz
spark-c64a8ff39794d60c596c0d34130019c09c9c8012.tar.bz2
spark-c64a8ff39794d60c596c0d34130019c09c9c8012.zip
[SPARK-18049][MLLIB][TEST] Add missing tests for truePositiveRate and weightedTruePositiveRate
## What changes were proposed in this pull request? Add missing tests for `truePositiveRate` and `weightedTruePositiveRate` in `MulticlassMetricsSuite` ## How was this patch tested? added testing Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #15585 from zhengruifeng/mc_missing_test.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala4
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala8
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala2
5 files changed, 13 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index 4413fefdea..bc4f9e6716 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -474,7 +474,7 @@ private[ml] object MetaAlgorithmReadWrite {
case ovr: OneVsRest => Array(ovr.getClassifier)
case ovrModel: OneVsRestModel => Array(ovrModel.getClassifier) ++ ovrModel.models
case rformModel: RFormulaModel => Array(rformModel.pipelineModel)
- case _: Params => Array()
+ case _: Params => Array.empty[Params]
}
val subStageMaps = subStages.flatMap(getUidMapImpl)
List((instance.uid, instance)) ++ subStageMaps
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index ce44215151..8f777cc35b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -73,7 +73,7 @@ class RegressionMetrics @Since("2.0.0") (
/**
* Returns the variance explained by regression.
- * explainedVariance = $\sum_i (\hat{y_i} - \bar{y})^2 / n$
+ * explainedVariance = $\sum_i (\hat{y_i} - \bar{y})^2^ / n$
* @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
*/
@Since("1.2.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index ff1068417d..377be6bfb9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -429,14 +429,14 @@ class BlockMatrix @Since("1.3.0") (
val rightCounterpartsHelper = rightMatrix.groupBy(_._1).mapValues(_.map(_._2))
val leftDestinations = leftMatrix.map { case (rowIndex, colIndex) =>
- val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, Array())
+ val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, Array.empty[Int])
val partitions = rightCounterparts.map(b => partitioner.getPartition((rowIndex, b)))
((rowIndex, colIndex), partitions.toSet)
}.toMap
val leftCounterpartsHelper = leftMatrix.groupBy(_._2).mapValues(_.map(_._1))
val rightDestinations = rightMatrix.map { case (rowIndex, colIndex) =>
- val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, Array())
+ val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, Array.empty[Int])
val partitions = leftCounterparts.map(b => partitioner.getPartition((b, colIndex)))
((rowIndex, colIndex), partitions.toSet)
}.toMap
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
index f316c67234..142d1e9812 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
@@ -36,6 +36,9 @@ class MulticlassMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
(1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)), 2)
val metrics = new MulticlassMetrics(predictionAndLabels)
val delta = 0.0000001
+ val tpRate0 = 2.0 / (2 + 2)
+ val tpRate1 = 3.0 / (3 + 1)
+ val tpRate2 = 1.0 / (1 + 0)
val fpRate0 = 1.0 / (9 - 4)
val fpRate1 = 1.0 / (9 - 4)
val fpRate2 = 1.0 / (9 - 1)
@@ -53,6 +56,9 @@ class MulticlassMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
val f2measure2 = (1 + 2 * 2) * precision2 * recall2 / (2 * 2 * precision2 + recall2)
assert(metrics.confusionMatrix.toArray.sameElements(confusionMatrix.toArray))
+ assert(math.abs(metrics.truePositiveRate(0.0) - tpRate0) < delta)
+ assert(math.abs(metrics.truePositiveRate(1.0) - tpRate1) < delta)
+ assert(math.abs(metrics.truePositiveRate(2.0) - tpRate2) < delta)
assert(math.abs(metrics.falsePositiveRate(0.0) - fpRate0) < delta)
assert(math.abs(metrics.falsePositiveRate(1.0) - fpRate1) < delta)
assert(math.abs(metrics.falsePositiveRate(2.0) - fpRate2) < delta)
@@ -75,6 +81,8 @@ class MulticlassMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
assert(math.abs(metrics.accuracy - metrics.recall) < delta)
assert(math.abs(metrics.accuracy - metrics.fMeasure) < delta)
assert(math.abs(metrics.accuracy - metrics.weightedRecall) < delta)
+ assert(math.abs(metrics.weightedTruePositiveRate -
+ ((4.0 / 9) * tpRate0 + (4.0 / 9) * tpRate1 + (1.0 / 9) * tpRate2)) < delta)
assert(math.abs(metrics.weightedFalsePositiveRate -
((4.0 / 9) * fpRate0 + (4.0 / 9) * fpRate1 + (1.0 / 9) * fpRate2)) < delta)
assert(math.abs(metrics.weightedPrecision -
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
index f3b19aeb42..a660492c7a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MultilabelMetricsSuite.scala
@@ -47,7 +47,7 @@ class MultilabelMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
val scoreAndLabels: RDD[(Array[Double], Array[Double])] = sc.parallelize(
Seq((Array(0.0, 1.0), Array(0.0, 2.0)),
(Array(0.0, 2.0), Array(0.0, 1.0)),
- (Array(), Array(0.0)),
+ (Array.empty[Double], Array(0.0)),
(Array(2.0), Array(2.0)),
(Array(2.0, 0.0), Array(2.0, 0.0)),
(Array(0.0, 1.0, 2.0), Array(0.0, 1.0)),