aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-04-08 11:32:44 -0700
committerReynold Xin <rxin@databricks.com>2015-04-08 11:32:44 -0700
commit66159c35010af35098dd1ec75475bb5d4d0fd6ca (patch)
tree4abd612335ecd42affea9a5795523a40baf9e3b9 /mllib
parent8d812f9986f2edf420a18ca822711c9765f480e2 (diff)
downloadspark-66159c35010af35098dd1ec75475bb5d4d0fd6ca.tar.gz
spark-66159c35010af35098dd1ec75475bb5d4d0fd6ca.tar.bz2
spark-66159c35010af35098dd1ec75475bb5d4d0fd6ca.zip
[SPARK-6765] Fix test code style for mllib.
So we can turn style checker on for test code. Author: Reynold Xin <rxin@databricks.com> Closes #5411 from rxin/test-style-mllib and squashes the following commits: d8a2569 [Reynold Xin] [SPARK-6765] Fix test code style for mllib.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala10
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala4
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala4
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala5
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala3
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala14
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala18
12 files changed, 40 insertions, 28 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
index 1a65883d78..ce52f2f230 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala
@@ -28,7 +28,7 @@ class TestParams extends Params {
def setInputCol(value: String): this.type = { set(inputCol, value); this }
def getInputCol: String = get(inputCol)
- override def validate(paramMap: ParamMap) = {
+ override def validate(paramMap: ParamMap): Unit = {
val m = this.paramMap ++ paramMap
require(m(maxIter) >= 0)
require(m.contains(inputCol))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index f9fe3e006c..ea89b17b7c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -102,7 +102,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
def validateModelFit(
piData: Array[Double],
thetaData: Array[Array[Double]],
- model: NaiveBayesModel) = {
+ model: NaiveBayesModel): Unit = {
def closeFit(d1: Double, d2: Double, precision: Double): Boolean = {
(d1 - d2).abs <= precision
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
index d50c43d439..5683b55e85 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.streaming.TestSuiteBase
class StreamingLogisticRegressionSuite extends FunSuite with TestSuiteBase {
// use longer wait time to ensure job completion
- override def maxWaitTimeMillis = 30000
+ override def maxWaitTimeMillis: Int = 30000
// Test if we can accurately learn B for Y = logistic(BX) on streaming data
test("parameter accuracy") {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 7bf250eb5a..0f2b26d462 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -199,9 +199,13 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext {
test("k-means|| initialization") {
case class VectorWithCompare(x: Vector) extends Ordered[VectorWithCompare] {
- @Override def compare(that: VectorWithCompare): Int = {
- if(this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
- that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) -1 else 1
+ override def compare(that: VectorWithCompare): Int = {
+ if (this.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x) >
+ that.x.toArray.foldLeft[Double](0.0)((acc, x) => acc + x * x)) {
+ -1
+ } else {
+ 1
+ }
}
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 302d751eb8..15de10fd13 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.clustering
import org.scalatest.FunSuite
-import org.apache.spark.mllib.linalg.{DenseMatrix, Matrix, Vectors}
+import org.apache.spark.mllib.linalg.{Vector, DenseMatrix, Matrix, Vectors}
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.mllib.util.TestingUtils._
@@ -141,7 +141,7 @@ private[clustering] object LDASuite {
(terms.toArray, termWeights.toArray)
}
- def tinyCorpus = Array(
+ def tinyCorpus: Array[(Long, Vector)] = Array(
Vectors.dense(1, 3, 0, 2, 8),
Vectors.dense(0, 2, 1, 0, 4),
Vectors.dense(2, 3, 12, 3, 1),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
index 850c9fce50..f90025d535 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.random.XORShiftRandom
class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
- override def maxWaitTimeMillis = 30000
+ override def maxWaitTimeMillis: Int = 30000
test("accuracy for single center and equivalence to grand average") {
// set parameters
@@ -59,7 +59,7 @@ class StreamingKMeansSuite extends FunSuite with TestSuiteBase {
// estimated center from streaming should exactly match the arithmetic mean of all data points
// because the decay factor is set to 1.0
val grandMean =
- input.flatten.map(x => x.toBreeze).reduce(_+_) / (numBatches * numPoints).toDouble
+ input.flatten.map(x => x.toBreeze).reduce(_ + _) / (numBatches * numPoints).toDouble
assert(model.latestModel().clusterCenters(0) ~== Vectors.dense(grandMean.toArray) absTol 1E-5)
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
index 6395188a08..63f2ea916d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
@@ -181,7 +181,8 @@ class RandomRDDsSuite extends FunSuite with MLlibTestSparkContext with Serializa
val poisson = RandomRDDs.poissonVectorRDD(sc, poissonMean, rows, cols, parts, seed)
testGeneratedVectorRDD(poisson, rows, cols, parts, poissonMean, math.sqrt(poissonMean), 0.1)
- val exponential = RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
+ val exponential =
+ RandomRDDs.exponentialVectorRDD(sc, exponentialMean, rows, cols, parts, seed)
testGeneratedVectorRDD(exponential, rows, cols, parts, exponentialMean, exponentialMean, 0.1)
val gamma = RandomRDDs.gammaVectorRDD(sc, gammaShape, gammaScale, rows, cols, parts, seed)
@@ -197,7 +198,7 @@ private[random] class MockDistro extends RandomDataGenerator[Double] {
// This allows us to check that each partition has a different seed
override def nextValue(): Double = seed.toDouble
- override def setSeed(seed: Long) = this.seed = seed
+ override def setSeed(seed: Long): Unit = this.seed = seed
override def copy(): MockDistro = new MockDistro
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index 8775c0ca9d..b3798940dd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -203,6 +203,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext {
* @param numProductBlocks number of product blocks to partition products into
* @param negativeFactors whether the generated user/product factors can have negative entries
*/
+ // scalastyle:off
def testALS(
users: Int,
products: Int,
@@ -216,6 +217,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext {
numUserBlocks: Int = -1,
numProductBlocks: Int = -1,
negativeFactors: Boolean = true) {
+ // scalastyle:on
+
val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
features, samplingRate, implicitPrefs, negativeWeights, negativeFactors)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index 43d61151e2..d6c93cc0e4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -35,7 +35,7 @@ private object RidgeRegressionSuite {
class RidgeRegressionSuite extends FunSuite with MLlibTestSparkContext {
- def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]) = {
+ def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = {
predictions.zip(input).map { case (prediction, expected) =>
(prediction - expected.label) * (prediction - expected.label)
}.reduceLeft(_ + _) / predictions.size
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index 24fd8df691..26604dbe6c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.streaming.TestSuiteBase
class StreamingLinearRegressionSuite extends FunSuite with TestSuiteBase {
// use longer wait time to ensure job completion
- override def maxWaitTimeMillis = 20000
+ override def maxWaitTimeMillis: Int = 20000
// Assert that two values are equal within tolerance epsilon
def assertEqual(v1: Double, v2: Double, epsilon: Double) {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
index e957fa5d25..352193a678 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
@@ -95,16 +95,16 @@ object TestingUtils {
/**
* Comparison using absolute tolerance.
*/
- def absTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(AbsoluteErrorComparison,
- x, eps, ABS_TOL_MSG)
+ def absTol(eps: Double): CompareDoubleRightSide =
+ CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG)
/**
* Comparison using relative tolerance.
*/
- def relTol(eps: Double): CompareDoubleRightSide = CompareDoubleRightSide(RelativeErrorComparison,
- x, eps, REL_TOL_MSG)
+ def relTol(eps: Double): CompareDoubleRightSide =
+ CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG)
- override def toString = x.toString
+ override def toString: String = x.toString
}
case class CompareVectorRightSide(
@@ -166,7 +166,7 @@ object TestingUtils {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
}, x, eps, REL_TOL_MSG)
- override def toString = x.toString
+ override def toString: String = x.toString
}
case class CompareMatrixRightSide(
@@ -229,7 +229,7 @@ object TestingUtils {
x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
}, x, eps, REL_TOL_MSG)
- override def toString = x.toString
+ override def toString: String = x.toString
}
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
index b0ecb33c28..59e6c77880 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtilsSuite.scala
@@ -88,16 +88,20 @@ class TestingUtilsSuite extends FunSuite {
assert(!(17.8 ~= 17.59 absTol 0.2))
// Comparisons of numbers very close to zero, and both side of zeros
- assert(Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
- assert(Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
-
- assert(-Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
- assert(Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+ assert(
+ Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+ assert(
+ Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+
+ assert(
+ -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+ assert(
+ Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
}
test("Comparing vectors using relative error.") {
- //Comparisons of two dense vectors
+ // Comparisons of two dense vectors
assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
@@ -130,7 +134,7 @@ class TestingUtilsSuite extends FunSuite {
test("Comparing vectors using absolute error.") {
- //Comparisons of two dense vectors
+ // Comparisons of two dense vectors
assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)