aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-05-28 20:09:12 -0700
committerReynold Xin <rxin@databricks.com>2015-05-28 20:09:12 -0700
commit04616b1a2f5244710b07ecbb404384ded893292c (patch)
treec2c1ce750c6d4f3dbd51c6a6bd89c25d4ce9f82c /mllib
parent9b692bfdfcc91b32498865d21138cf215a378665 (diff)
downloadspark-04616b1a2f5244710b07ecbb404384ded893292c.tar.gz
spark-04616b1a2f5244710b07ecbb404384ded893292c.tar.bz2
spark-04616b1a2f5244710b07ecbb404384ded893292c.zip
[SPARK-7927] [MLLIB] Enforce whitespace for more tokens in style checker
rxin Author: Xiangrui Meng <meng@databricks.com> Closes #6481 from mengxr/mllib-scalastyle and squashes the following commits: 3ca4d61 [Xiangrui Meng] revert scalastyle config 30961ba [Xiangrui Meng] adjust spaces in mllib/test 571b5c5 [Xiangrui Meng] fix spaces in mllib
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala7
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala12
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala18
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala4
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala4
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala10
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala2
33 files changed, 88 insertions, 71 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 36735cd834..b8c7f3c5bc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -70,7 +70,7 @@ private[ml] trait OneVsRestParams extends PredictorParams {
final class OneVsRestModel private[ml] (
override val uid: String,
labelMetadata: Metadata,
- val models: Array[_ <: ClassificationModel[_,_]])
+ val models: Array[_ <: ClassificationModel[_, _]])
extends Model[OneVsRestModel] with OneVsRestParams {
override def transformSchema(schema: StructType): StructType = {
@@ -104,17 +104,17 @@ final class OneVsRestModel private[ml] (
// add temporary column to store intermediate scores and update
val tmpColName = "mbc$tmp" + UUID.randomUUID().toString
- val update: (Map[Int, Double], Vector) => Map[Int, Double] =
+ val update: (Map[Int, Double], Vector) => Map[Int, Double] =
(predictions: Map[Int, Double], prediction: Vector) => {
predictions + ((index, prediction(1)))
}
val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol))
- val transformedDataset = model.transform(df).select(columns:_*)
+ val transformedDataset = model.transform(df).select(columns : _*)
val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf)
val newColumns = origCols ++ List(col(tmpColName))
// switch out the intermediate column with the accumulator column
- updatedDataset.select(newColumns:_*).withColumnRenamed(tmpColName, accColName)
+ updatedDataset.select(newColumns : _*).withColumnRenamed(tmpColName, accColName)
}
if (handlePersistence) {
@@ -190,7 +190,7 @@ final class OneVsRest(override val uid: String)
val trainingDataset = multiclassLabeled.withColumn(labelColName, labelUDFWithNewMeta)
val classifier = getClassifier
classifier.fit(trainingDataset, classifier.labelCol -> labelColName)
- }.toArray[ClassificationModel[_,_]]
+ }.toArray[ClassificationModel[_, _]]
if (handlePersistence) {
multiclassLabeled.unpersist()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
index 6a84176efb..4242154be1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
@@ -159,9 +159,9 @@ final class InternalNode private[ml] (
override private[tree] def subtreeToString(indentFactor: Int = 0): String = {
val prefix: String = " " * indentFactor
- prefix + s"If (${InternalNode.splitToString(split, left=true)})\n" +
+ prefix + s"If (${InternalNode.splitToString(split, left = true)})\n" +
leftChild.subtreeToString(indentFactor + 1) +
- prefix + s"Else (${InternalNode.splitToString(split, left=false)})\n" +
+ prefix + s"Else (${InternalNode.splitToString(split, left = false)})\n" +
rightChild.subtreeToString(indentFactor + 1)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 2fa54df6fc..65f30fdba7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -392,7 +392,7 @@ private[python] class PythonMLLibAPI extends Serializable {
data: JavaRDD[Vector],
wt: Vector,
mu: Array[Object],
- si: Array[Object]): RDD[Vector] = {
+ si: Array[Object]): RDD[Vector] = {
val weight = wt.toArray
val mean = mu.map(_.asInstanceOf[DenseVector])
@@ -428,7 +428,7 @@ private[python] class PythonMLLibAPI extends Serializable {
if (seed != null) als.setSeed(seed)
- val model = als.run(ratingsJRDD.rdd)
+ val model = als.run(ratingsJRDD.rdd)
new MatrixFactorizationModelWrapper(model)
}
@@ -459,7 +459,7 @@ private[python] class PythonMLLibAPI extends Serializable {
if (seed != null) als.setSeed(seed)
- val model = als.run(ratingsJRDD.rdd)
+ val model = als.run(ratingsJRDD.rdd)
new MatrixFactorizationModelWrapper(model)
}
@@ -1242,7 +1242,7 @@ private[spark] object SerDe extends Serializable {
}
/* convert RDD[Tuple2[,]] to RDD[Array[Any]] */
- def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]] = {
+ def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]] = {
rdd.map(x => Array(x._1, x._2))
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index c88410ac0f..e9a23e40cc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -211,7 +211,7 @@ class GaussianMixture private (
private object ExpectationSum {
def zero(k: Int, d: Int): ExpectationSum = {
new ExpectationSum(0.0, Array.fill(k)(0.0),
- Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d,d)))
+ Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d, d)))
}
// compute cluster contributions for each input point
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 6fa2fe053c..8e5154b902 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -273,7 +273,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
* Default: 1024, following the original Online LDA paper.
*/
def setTau0(tau0: Double): this.type = {
- require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
+ require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0")
this.tau0 = tau0
this
}
@@ -339,7 +339,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
override private[clustering] def initialize(
docs: RDD[(Long, Vector)],
- lda: LDA): OnlineLDAOptimizer = {
+ lda: LDA): OnlineLDAOptimizer = {
this.k = lda.getK
this.corpusSize = docs.count()
this.vocabSize = docs.first()._2.size
@@ -458,7 +458,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
* uses digamma which is accurate but expensive.
*/
private def dirichletExpectation(alpha: BDM[Double]): BDM[Double] = {
- val rowSum = sum(alpha(breeze.linalg.*, ::))
+ val rowSum = sum(alpha(breeze.linalg.*, ::))
val digAlpha = digamma(alpha)
val digRowSum = digamma(rowSum)
val result = digAlpha(::, breeze.linalg.*) - digRowSum
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
index a89eea0e21..efbfeb4059 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala
@@ -144,7 +144,7 @@ private object IDF {
* Since arrays are initialized to 0 by default,
* we just omit changing those entries.
*/
- if(df(j) >= minDocFreq) {
+ if (df(j) >= minDocFreq) {
inv(j) = math.log((m + 1.0) / (df(j) + 1.0))
}
j += 1
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 6ae6917eae..c73b8f2580 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -90,7 +90,7 @@ class StandardScalerModel (
@DeveloperApi
def setWithMean(withMean: Boolean): this.type = {
- require(!(withMean && this.mean == null),"cannot set withMean to true while mean is null")
+ require(!(withMean && this.mean == null), "cannot set withMean to true while mean is null")
this.withMean = withMean
this
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 9106b73dfc..466ae95859 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -49,7 +49,7 @@ private case class VocabWord(
var cn: Int,
var point: Array[Int],
var code: Array[Int],
- var codeLen:Int
+ var codeLen: Int
)
/**
@@ -469,7 +469,7 @@ class Word2VecModel private[mllib] (
val norm1 = blas.snrm2(n, v1, 1)
val norm2 = blas.snrm2(n, v2, 1)
if (norm1 == 0 || norm2 == 0) return 0.0
- blas.sdot(n, v1, 1, v2,1) / norm1 / norm2
+ blas.sdot(n, v1, 1, v2, 1) / norm1 / norm2
}
override protected def formatVersion = "1.0"
@@ -500,7 +500,7 @@ class Word2VecModel private[mllib] (
*/
def findSynonyms(word: String, num: Int): Array[(String, Double)] = {
val vector = transform(word)
- findSynonyms(vector,num)
+ findSynonyms(vector, num)
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 9a89a6f3a5..1626da9c3d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -219,7 +219,7 @@ class RowMatrix(
val computeMode = mode match {
case "auto" =>
- if(k > 5000) {
+ if (k > 5000) {
logWarning(s"computing svd with k=$k and n=$n, please check necessity")
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index 8341bb86af..7db5a14fd4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -52,7 +52,7 @@ object RandomRDDs {
numPartitions: Int = 0,
seed: Long = Utils.random.nextLong()): RDD[Double] = {
val uniform = new UniformGenerator()
- randomRDD(sc, uniform, size, numPartitionsOrDefault(sc, numPartitions), seed)
+ randomRDD(sc, uniform, size, numPartitionsOrDefault(sc, numPartitions), seed)
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 3ea63dd8c0..96e50faca2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -203,7 +203,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
implicit val formats = DefaultFormats
val (loadedClassName, version, metadata) = loadMetadata(sc, path)
- val isotonic = (metadata \ "isotonic").extract[Boolean]
+ val isotonic = (metadata \ "isotonic").extract[Boolean]
val classNameV1_0 = SaveLoadV1_0.thisClassName
(loadedClassName, version) match {
case (className, "1.0") if className == classNameV1_0 =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 0b1755613a..d321cc554c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -70,7 +70,7 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
require(n == sample.size, s"Dimensions mismatch when adding new sample." +
s" Expecting $n but got ${sample.size}.")
- val localCurrMean= currMean
+ val localCurrMean = currMean
val localCurrM2n = currM2n
val localCurrM2 = currM2
val localCurrL1 = currL1
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index e597fce2ba..23c8d7c7c8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -196,7 +196,7 @@ private[stat] object ChiSqTest extends Logging {
* Pearson's independence test on the input contingency matrix.
* TODO: optimize for SparseMatrix when it becomes supported.
*/
- def chiSquaredMatrix(counts: Matrix, methodName:String = PEARSON.name): ChiSqTestResult = {
+ def chiSquaredMatrix(counts: Matrix, methodName: String = PEARSON.name): ChiSqTestResult = {
val method = methodFromString(methodName)
val numRows = counts.numRows
val numCols = counts.numCols
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index dfe3a0b691..cecd1fed89 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -169,7 +169,7 @@ object DecisionTree extends Serializable with Logging {
numClasses: Int,
maxBins: Int,
quantileCalculationStrategy: QuantileStrategy,
- categoricalFeaturesInfo: Map[Int,Int]): DecisionTreeModel = {
+ categoricalFeaturesInfo: Map[Int, Int]): DecisionTreeModel = {
val strategy = new Strategy(algo, impurity, maxDepth, numClasses, maxBins,
quantileCalculationStrategy, categoricalFeaturesInfo)
new DecisionTree(strategy).run(input)
@@ -768,7 +768,7 @@ object DecisionTree extends Serializable with Logging {
*/
private def calculatePredictImpurity(
leftImpurityCalculator: ImpurityCalculator,
- rightImpurityCalculator: ImpurityCalculator): (Predict, Double) = {
+ rightImpurityCalculator: ImpurityCalculator): (Predict, Double) = {
val parentNodeAgg = leftImpurityCalculator.copy
parentNodeAgg.add(rightImpurityCalculator)
val predict = calculatePredict(parentNodeAgg)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index 1f779584dc..e3ddc70536 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -60,12 +60,12 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
val algo = boostingStrategy.treeStrategy.algo
algo match {
- case Regression => GradientBoostedTrees.boost(input, input, boostingStrategy, validate=false)
+ case Regression =>
+ GradientBoostedTrees.boost(input, input, boostingStrategy, validate = false)
case Classification =>
// Map labels to -1, +1 so binary classification can be treated as regression.
val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
- GradientBoostedTrees.boost(remappedInput,
- remappedInput, boostingStrategy, validate=false)
+ GradientBoostedTrees.boost(remappedInput, remappedInput, boostingStrategy, validate = false)
case _ =>
throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
}
@@ -93,8 +93,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = {
val algo = boostingStrategy.treeStrategy.algo
algo match {
- case Regression => GradientBoostedTrees.boost(
- input, validationInput, boostingStrategy, validate=true)
+ case Regression =>
+ GradientBoostedTrees.boost(input, validationInput, boostingStrategy, validate = true)
case Classification =>
// Map labels to -1, +1 so binary classification can be treated as regression.
val remappedInput = input.map(
@@ -102,7 +102,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy)
val remappedValidationInput = validationInput.map(
x => new LabeledPoint((x.label * 2) - 1, x.features))
GradientBoostedTrees.boost(remappedInput, remappedValidationInput, boostingStrategy,
- validate=true)
+ validate = true)
case _ =>
throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index b347c450c1..99d0e3cf2f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -249,7 +249,7 @@ private class RandomForest (
try {
nodeIdCache.get.deleteAllCheckpoints()
} catch {
- case e:IOException =>
+ case e: IOException =>
logWarning(s"delete all checkpoints failed. Error reason: ${e.getMessage}")
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
index 431a839817..ee710fc1ed 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala
@@ -151,9 +151,9 @@ class Node (
s"(feature ${split.feature} > ${split.threshold})"
}
case Categorical => if (left) {
- s"(feature ${split.feature} in ${split.categories.mkString("{",",","}")})"
+ s"(feature ${split.feature} in ${split.categories.mkString("{", ",", "}")})"
} else {
- s"(feature ${split.feature} not in ${split.categories.mkString("{",",","}")})"
+ s"(feature ${split.feature} not in ${split.categories.mkString("{", ",", "}")})"
}
}
}
@@ -161,9 +161,9 @@ class Node (
if (isLeaf) {
prefix + s"Predict: ${predict.predict}\n"
} else {
- prefix + s"If ${splitToString(split.get, left=true)}\n" +
+ prefix + s"If ${splitToString(split.get, left = true)}\n" +
leftNode.get.subtreeToString(indentFactor + 1) +
- prefix + s"Else ${splitToString(split.get, left=false)}\n" +
+ prefix + s"Else ${splitToString(split.get, left = false)}\n" +
rightNode.get.subtreeToString(indentFactor + 1)
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 0c5b4f9d04..bd73a866c8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -82,8 +82,7 @@ object MFDataGenerator {
BLAS.gemm(z, A, B, 1.0, fullData)
val df = rank * (m + n - rank)
- val sampSize = scala.math.min(scala.math.round(trainSampFact * df),
- scala.math.round(.99 * m * n)).toInt
+ val sampSize = math.min(math.round(trainSampFact * df), math.round(.99 * m * n)).toInt
val rand = new Random()
val mn = m * n
val shuffled = rand.shuffle((0 until mn).toList)
@@ -102,8 +101,8 @@ object MFDataGenerator {
// optionally generate testing data
if (test) {
- val testSampSize = scala.math
- .min(scala.math.round(sampSize * testSampFact),scala.math.round(mn - sampSize)).toInt
+ val testSampSize = math.min(
+ math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt
val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
val testOrdered = testOmega.sortWith(_ < _).toArray
val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index 43a09cc418..df446d0c22 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -35,9 +35,9 @@ class Word2VecSuite extends FunSuite with MLlibTestSparkContext {
val doc = sc.parallelize(Seq(sentence, sentence)).map(line => line.split(" "))
val codes = Map(
- "a" -> Array(-0.2811822295188904,-0.6356269121170044,-0.3020961284637451),
- "b" -> Array(1.0309048891067505,-1.29472815990448,0.22276712954044342),
- "c" -> Array(-0.08456747233867645,0.5137411952018738,0.11731560528278351)
+ "a" -> Array(-0.2811822295188904, -0.6356269121170044, -0.3020961284637451),
+ "b" -> Array(1.0309048891067505, -1.29472815990448, 0.22276712954044342),
+ "c" -> Array(-0.08456747233867645, 0.5137411952018738, 0.11731560528278351)
)
val expected = doc.map { sentence =>
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index 65972ec79b..60d8bfe38f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -90,14 +90,20 @@ object CrossValidatorSuite {
override def validateParams(): Unit = require($(inputCol).nonEmpty)
- override def fit(dataset: DataFrame): MyModel = ???
+ override def fit(dataset: DataFrame): MyModel = {
+ throw new UnsupportedOperationException
+ }
- override def transformSchema(schema: StructType): StructType = ???
+ override def transformSchema(schema: StructType): StructType = {
+ throw new UnsupportedOperationException
+ }
}
class MyEvaluator extends Evaluator {
- override def evaluate(dataset: DataFrame): Double = ???
+ override def evaluate(dataset: DataFrame): Double = {
+ throw new UnsupportedOperationException
+ }
override val uid: String = "eval"
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
index a629dba8a4..3d362b5ee5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
@@ -84,7 +84,7 @@ class PythonMLLibAPISuite extends FunSuite {
val smt = new SparseMatrix(
3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9),
- isTransposed=true)
+ isTransposed = true)
val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix]
assert(smt.toArray === nsmt.toArray)
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index c111a78a55..ea40b41bbb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -163,7 +163,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext {
val theta = Array(
Array(0.50, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.40), // label 0
Array(0.02, 0.70, 0.10, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02), // label 1
- Array(0.02, 0.02, 0.60, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.30) // label 2
+ Array(0.02, 0.02, 0.60, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.30) // label 2
).map(_.map(math.log))
val testData = NaiveBayesSuite.generateNaiveBayesInput(
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 6de098b383..90f9cec685 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -46,7 +46,7 @@ object SVMSuite {
nPoints: Int,
seed: Int): Seq[LabeledPoint] = {
val rnd = new Random(seed)
- val weightsMat = new DoubleMatrix(1, weights.length, weights:_*)
+ val weightsMat = new DoubleMatrix(1, weights.length, weights : _*)
val x = Array.fill[Array[Double]](nPoints)(
Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
val y = x.map { xi =>
@@ -91,7 +91,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext {
val model = svm.run(testRDD)
val validationData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 17)
- val validationRDD = sc.parallelize(validationData, 2)
+ val validationRDD = sc.parallelize(validationData, 2)
// Test prediction on RDD.
@@ -117,7 +117,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext {
val B = -1.5
val C = 1.0
- val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42)
+ val testData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 42)
val testRDD = sc.parallelize(testData, 2)
testRDD.cache()
@@ -127,8 +127,8 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext {
val model = svm.run(testRDD)
- val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17)
- val validationRDD = sc.parallelize(validationData, 2)
+ val validationData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 17)
+ val validationRDD = sc.parallelize(validationData, 2)
// Test prediction on RDD.
validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
@@ -145,7 +145,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext {
val B = -1.5
val C = 1.0
- val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42)
+ val testData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 42)
val initialB = -1.0
val initialC = -1.0
@@ -159,8 +159,8 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext {
val model = svm.run(testRDD, initialWeights)
- val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17)
- val validationRDD = sc.parallelize(validationData,2)
+ val validationData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 17)
+ val validationRDD = sc.parallelize(validationData, 2)
// Test prediction on RDD.
validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
@@ -177,7 +177,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext {
val B = -1.5
val C = 1.0
- val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42)
+ val testData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 42)
val testRDD = sc.parallelize(testData, 2)
val testRDDInvalid = testRDD.map { lp =>
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 0f2b26d462..877e6dc699 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -75,7 +75,7 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext {
val center = Vectors.dense(1.0, 2.0, 3.0)
// Make sure code runs.
- var model = KMeans.train(data, k=2, maxIterations=1)
+ var model = KMeans.train(data, k = 2, maxIterations = 1)
assert(model.clusterCenters.size === 2)
}
@@ -87,7 +87,7 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext {
2)
// Make sure code runs.
- var model = KMeans.train(data, k=3, maxIterations=1)
+ var model = KMeans.train(data, k = 3, maxIterations = 1)
assert(model.clusterCenters.size === 3)
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
index 6d6fe6fe46..556842f312 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@@ -94,11 +94,13 @@ class PowerIterationClusteringSuite extends FunSuite with MLlibTestSparkContext
*/
val similarities = Seq[(Long, Long, Double)](
(0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0), (1, 2, 1.0), (2, 3, 1.0))
+ // scalastyle:off
val expected = Array(
Array(0.0, 1.0/3.0, 1.0/3.0, 1.0/3.0),
Array(1.0/2.0, 0.0, 1.0/2.0, 0.0),
Array(1.0/3.0, 1.0/3.0, 0.0, 1.0/3.0),
Array(1.0/2.0, 0.0, 1.0/2.0, 0.0))
+ // scalastyle:on
val w = normalize(sc.parallelize(similarities, 2))
w.edges.collect().foreach { case Edge(i, j, x) =>
assert(x ~== expected(i.toInt)(j.toInt) absTol 1e-14)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
index 670b4c34e6..3aa732474e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala
@@ -26,7 +26,7 @@ class RegressionMetricsSuite extends FunSuite with MLlibTestSparkContext {
test("regression metrics") {
val predictionAndObservations = sc.parallelize(
- Seq((2.5,3.0),(0.0,-0.5),(2.0,2.0),(8.0,7.0)), 2)
+ Seq((2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)), 2)
val metrics = new RegressionMetrics(predictionAndObservations)
assert(metrics.explainedVariance ~== 0.95717 absTol 1E-5,
"explained variance regression score mismatch")
@@ -39,7 +39,7 @@ class RegressionMetricsSuite extends FunSuite with MLlibTestSparkContext {
test("regression metrics with complete fitting") {
val predictionAndObservations = sc.parallelize(
- Seq((3.0,3.0),(0.0,0.0),(2.0,2.0),(8.0,8.0)), 2)
+ Seq((3.0, 3.0), (0.0, 0.0), (2.0, 2.0), (8.0, 8.0)), 2)
val metrics = new RegressionMetrics(predictionAndObservations)
assert(metrics.explainedVariance ~== 1.0 absTol 1E-5,
"explained variance regression score mismatch")
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
index 7f94564b2a..1eb991869d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala
@@ -360,7 +360,7 @@ class StandardScalerSuite extends FunSuite with MLlibTestSparkContext {
}
withClue("model needs std and mean vectors to be equal size when both are provided") {
intercept[IllegalArgumentException] {
- val model = new StandardScalerModel(Vectors.dense(0.0), Vectors.dense(0.0,1.0))
+ val model = new StandardScalerModel(Vectors.dense(0.0), Vectors.dense(0.0, 1.0))
}
}
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index 949d1c9939..a583361758 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -57,11 +57,13 @@ class BlockMatrixSuite extends FunSuite with MLlibTestSparkContext {
val random = new ju.Random()
// This should generate a 4x4 grid of 1x2 blocks.
val part0 = GridPartitioner(4, 7, suggestedNumPartitions = 12)
+ // scalastyle:off
val expected0 = Array(
Array(0, 0, 4, 4, 8, 8, 12),
Array(1, 1, 5, 5, 9, 9, 13),
Array(2, 2, 6, 6, 10, 10, 14),
Array(3, 3, 7, 7, 11, 11, 15))
+ // scalastyle:on
for (i <- 0 until 4; j <- 0 until 7) {
assert(part0.getPartition((i, j)) === expected0(i)(j))
assert(part0.getPartition((i, j, random.nextInt())) === expected0(i)(j))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
index 86481c6e66..e110506d57 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala
@@ -42,7 +42,7 @@ object GradientDescentSuite {
offset: Double,
scale: Double,
nPoints: Int,
- seed: Int): Seq[LabeledPoint] = {
+ seed: Int): Seq[LabeledPoint] = {
val rnd = new Random(seed)
val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian())
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
index 22855e4e8f..bb723fc471 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala
@@ -68,12 +68,14 @@ class NNLSSuite extends FunSuite {
test("NNLS: nonnegativity constraint active") {
val n = 5
+ // scalastyle:off
val ata = new DoubleMatrix(Array(
Array( 4.377, -3.531, -1.306, -0.139, 3.418),
Array(-3.531, 4.344, 0.934, 0.305, -2.140),
Array(-1.306, 0.934, 2.644, -0.203, -0.170),
Array(-0.139, 0.305, -0.203, 5.883, 1.428),
Array( 3.418, -2.140, -0.170, 1.428, 4.684)))
+ // scalastyle:on
val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636))
val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index c9f5dc069e..71dce50922 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -67,11 +67,12 @@ class LassoSuite extends FunSuite with MLlibTestSparkContext {
assert(weight1 >= -1.60 && weight1 <= -1.40, weight1 + " not in [-1.6, -1.4]")
assert(weight2 >= -1.0e-3 && weight2 <= 1.0e-3, weight2 + " not in [-0.001, 0.001]")
- val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17)
+ val validationData = LinearDataGenerator
+ .generateLinearInput(A, Array[Double](B, C), nPoints, 17)
.map { case LabeledPoint(label, features) =>
LabeledPoint(label, Vectors.dense(1.0 +: features.toArray))
}
- val validationRDD = sc.parallelize(validationData, 2)
+ val validationRDD = sc.parallelize(validationData, 2)
// Test prediction on RDD.
validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
@@ -110,11 +111,12 @@ class LassoSuite extends FunSuite with MLlibTestSparkContext {
assert(weight1 >= -1.60 && weight1 <= -1.40, weight1 + " not in [-1.6, -1.4]")
assert(weight2 >= -1.0e-3 && weight2 <= 1.0e-3, weight2 + " not in [-0.001, 0.001]")
- val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17)
+ val validationData = LinearDataGenerator
+ .generateLinearInput(A, Array[Double](B, C), nPoints, 17)
.map { case LabeledPoint(label, features) =>
LabeledPoint(label, Vectors.dense(1.0 +: features.toArray))
}
- val validationRDD = sc.parallelize(validationData,2)
+ val validationRDD = sc.parallelize(validationData, 2)
// Test prediction on RDD.
validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
index d20a09b4b4..a7e6fce31f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
@@ -96,11 +96,13 @@ class CorrelationSuite extends FunSuite with MLlibTestSparkContext {
val X = sc.parallelize(data)
val defaultMat = Statistics.corr(X)
val pearsonMat = Statistics.corr(X, "pearson")
+ // scalastyle:off
val expected = BDM(
(1.00000000, 0.05564149, Double.NaN, 0.4004714),
(0.05564149, 1.00000000, Double.NaN, 0.9135959),
(Double.NaN, Double.NaN, 1.00000000, Double.NaN),
- (0.40047142, 0.91359586, Double.NaN,1.0000000))
+ (0.40047142, 0.91359586, Double.NaN, 1.0000000))
+ // scalastyle:on
assert(matrixApproxEqual(defaultMat.toBreeze, expected))
assert(matrixApproxEqual(pearsonMat.toBreeze, expected))
}
@@ -108,11 +110,13 @@ class CorrelationSuite extends FunSuite with MLlibTestSparkContext {
test("corr(X) spearman") {
val X = sc.parallelize(data)
val spearmanMat = Statistics.corr(X, "spearman")
+ // scalastyle:off
val expected = BDM(
(1.0000000, 0.1054093, Double.NaN, 0.4000000),
(0.1054093, 1.0000000, Double.NaN, 0.9486833),
(Double.NaN, Double.NaN, 1.00000000, Double.NaN),
(0.4000000, 0.9486833, Double.NaN, 1.0000000))
+ // scalastyle:on
assert(matrixApproxEqual(spearmanMat.toBreeze, expected))
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
index 668fc1d43c..cdece2c174 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@@ -168,7 +168,7 @@ class MLUtilsSuite extends FunSuite with MLlibTestSparkContext {
"Each training+validation set combined should contain all of the data.")
}
// K fold cross validation should only have each element in the validation set exactly once
- assert(foldedRdds.map(_._2).reduce((x,y) => x.union(y)).collect().sorted ===
+ assert(foldedRdds.map(_._2).reduce((x, y) => x.union(y)).collect().sorted ===
data.collect().sorted)
}
}