aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-07-05 17:16:49 -0700
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-07-05 17:16:49 -0700
commitde67deeaabc12a62dadee5ec302fe58bee0b3498 (patch)
treef32ab44882937eeeaf8b34c269fae337241976b5 /mllib
parent43b24635ee45a845f2432bc13c11fcf2eb02f2f3 (diff)
downloadspark-de67deeaabc12a62dadee5ec302fe58bee0b3498.tar.gz
spark-de67deeaabc12a62dadee5ec302fe58bee0b3498.tar.bz2
spark-de67deeaabc12a62dadee5ec302fe58bee0b3498.zip
Addressed style comments from Ryan LeCompte
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/spark/ml/clustering/KMeans.scala14
-rw-r--r--mllib/src/main/scala/spark/ml/recommendation/ALS.scala4
-rw-r--r--mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala8
3 files changed, 13 insertions, 13 deletions
diff --git a/mllib/src/main/scala/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
index 6d78f926c2..b0e141ff32 100644
--- a/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/spark/ml/clustering/KMeans.scala
@@ -100,7 +100,7 @@ class KMeans private (
val sc = data.sparkContext
- var centers = if (initializationMode == KMeans.RANDOM) {
+ val centers = if (initializationMode == KMeans.RANDOM) {
initRandom(data)
} else {
initKMeansParallel(data)
@@ -131,13 +131,11 @@ class KMeans private (
val sums = Array.fill(runs, k)(new DoubleMatrix(dims))
val counts = Array.fill(runs, k)(0L)
- for (point <- points) {
- for ((centers, runIndex) <- activeCenters.zipWithIndex) {
- val (bestCenter, cost) = KMeans.findClosest(centers, point)
- costAccums(runIndex) += cost
- sums(runIndex)(bestCenter).addi(new DoubleMatrix(point))
- counts(runIndex)(bestCenter) += 1
- }
+ for (point <- points; (centers, runIndex) <- activeCenters.zipWithIndex) {
+ val (bestCenter, cost) = KMeans.findClosest(centers, point)
+ costAccums(runIndex) += cost
+ sums(runIndex)(bestCenter).addi(new DoubleMatrix(point))
+ counts(runIndex)(bestCenter) += 1
}
val contribs = for (i <- 0 until runs; j <- 0 until k) yield {
diff --git a/mllib/src/main/scala/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
index 0c6fa6f741..6c9fb2359c 100644
--- a/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/spark/ml/recommendation/ALS.scala
@@ -168,7 +168,9 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
val grouped = ratings.partitionBy(new HashPartitioner(numBlocks))
val links = grouped.mapPartitionsWithIndex((blockId, elements) => {
val ratings = elements.map(_._2).toArray
- Iterator((blockId, (makeInLinkBlock(numBlocks, ratings), makeOutLinkBlock(numBlocks, ratings))))
+ val inLinkBlock = makeInLinkBlock(numBlocks, ratings)
+ val outLinkBlock = makeOutLinkBlock(numBlocks, ratings)
+ Iterator.single((blockId, (inLinkBlock, outLinkBlock)))
}, true)
links.persist(StorageLevel.MEMORY_AND_DISK)
(links.mapValues(_._1), links.mapValues(_._2))
diff --git a/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala b/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
index 2d07c77141..a6ececbeb6 100644
--- a/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/spark/ml/regression/RidgeRegression.scala
@@ -140,8 +140,8 @@ class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double)
object RidgeRegression {
/**
- * Train a ridge regression model given an RDD of (response, features) pairs.
- * We use the closed form solution to compute the cross-validation score for
+ * Train a ridge regression model given an RDD of (response, features) pairs.
+ * We use the closed form solution to compute the cross-validation score for
* a given lambda. The optimal lambda is computed by performing binary search
* between the provided bounds of lambda.
*
@@ -159,8 +159,8 @@ object RidgeRegression {
}
/**
- * Train a ridge regression model given an RDD of (response, features) pairs.
- * We use the closed form solution to compute the cross-validation score for
+ * Train a ridge regression model given an RDD of (response, features) pairs.
+ * We use the closed form solution to compute the cross-validation score for
* a given lambda. The optimal lambda is computed by performing binary search
* between lambda values of 0 and 100.
*