From de67deeaabc12a62dadee5ec302fe58bee0b3498 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Fri, 5 Jul 2013 17:16:49 -0700 Subject: Addressed style comments from Ryan LeCompte --- mllib/src/main/scala/spark/ml/clustering/KMeans.scala | 14 ++++++-------- mllib/src/main/scala/spark/ml/recommendation/ALS.scala | 4 +++- .../main/scala/spark/ml/regression/RidgeRegression.scala | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/mllib/src/main/scala/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/spark/ml/clustering/KMeans.scala index 6d78f926c2..b0e141ff32 100644 --- a/mllib/src/main/scala/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/spark/ml/clustering/KMeans.scala @@ -100,7 +100,7 @@ class KMeans private ( val sc = data.sparkContext - var centers = if (initializationMode == KMeans.RANDOM) { + val centers = if (initializationMode == KMeans.RANDOM) { initRandom(data) } else { initKMeansParallel(data) @@ -131,13 +131,11 @@ class KMeans private ( val sums = Array.fill(runs, k)(new DoubleMatrix(dims)) val counts = Array.fill(runs, k)(0L) - for (point <- points) { - for ((centers, runIndex) <- activeCenters.zipWithIndex) { - val (bestCenter, cost) = KMeans.findClosest(centers, point) - costAccums(runIndex) += cost - sums(runIndex)(bestCenter).addi(new DoubleMatrix(point)) - counts(runIndex)(bestCenter) += 1 - } + for (point <- points; (centers, runIndex) <- activeCenters.zipWithIndex) { + val (bestCenter, cost) = KMeans.findClosest(centers, point) + costAccums(runIndex) += cost + sums(runIndex)(bestCenter).addi(new DoubleMatrix(point)) + counts(runIndex)(bestCenter) += 1 } val contribs = for (i <- 0 until runs; j <- 0 until k) yield { diff --git a/mllib/src/main/scala/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/spark/ml/recommendation/ALS.scala index 0c6fa6f741..6c9fb2359c 100644 --- a/mllib/src/main/scala/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/spark/ml/recommendation/ALS.scala @@ -168,7 +168,9 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l val grouped = ratings.partitionBy(new HashPartitioner(numBlocks)) val links = grouped.mapPartitionsWithIndex((blockId, elements) => { val ratings = elements.map(_._2).toArray - Iterator((blockId, (makeInLinkBlock(numBlocks, ratings), makeOutLinkBlock(numBlocks, ratings)))) + val inLinkBlock = makeInLinkBlock(numBlocks, ratings) + val outLinkBlock = makeOutLinkBlock(numBlocks, ratings) + Iterator.single((blockId, (inLinkBlock, outLinkBlock))) }, true) links.persist(StorageLevel.MEMORY_AND_DISK) (links.mapValues(_._1), links.mapValues(_._2)) diff --git a/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala b/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala index 2d07c77141..a6ececbeb6 100644 --- a/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala @@ -140,8 +140,8 @@ class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double) object RidgeRegression { /** - * Train a ridge regression model given an RDD of (response, features) pairs. - * We use the closed form solution to compute the cross-validation score for + * Train a ridge regression model given an RDD of (response, features) pairs. + * We use the closed form solution to compute the cross-validation score for * a given lambda. The optimal lambda is computed by performing binary search * between the provided bounds of lambda. * @@ -159,8 +159,8 @@ object RidgeRegression { } /** - * Train a ridge regression model given an RDD of (response, features) pairs. - * We use the closed form solution to compute the cross-validation score for + * Train a ridge regression model given an RDD of (response, features) pairs. + * We use the closed form solution to compute the cross-validation score for * a given lambda. The optimal lambda is computed by performing binary search * between lambda values of 0 and 100. * -- cgit v1.2.3