Minor style cleanup of mllib.

author: Reynold Xin <reynoldx@gmail.com> 2013-07-30 13:59:32 -0700
committer: Reynold Xin <reynoldx@gmail.com> 2013-07-30 13:59:32 -0700
commit: 366f7735ebe1004acf113df257950d287c50471a (patch)
tree: 9f4076a34be3ba28a32d973cdc55db4d040cfe11 /mllib
parent: 47011e685449edfe9f91d8f937a5d23d3b359baf (diff)
download: spark-366f7735ebe1004acf113df257950d287c50471a.tar.gz
spark-366f7735ebe1004acf113df257950d287c50471a.tar.bz2
spark-366f7735ebe1004acf113df257950d287c50471a.zip
5 files changed, 39 insertions, 35 deletions
diff --git a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
index bf3b05dedb..203aa8fdd4 100644
--- a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
@@ -135,8 +135,8 @@ class LogisticRegressionLocalRandomSGD private (var stepSize: Double, var miniBa
 object LogisticRegressionLocalRandomSGD {
 
   /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
+   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+   * number of iterations of gradient descent using the specified step size. Each iteration uses
    * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
    * gradient descent are initialized using the initial weights provided.
    *
@@ -155,12 +155,13 @@ object LogisticRegressionLocalRandomSGD {
       initialWeights: Array[Double])
     : LogisticRegressionModel =
   {
-    new LogisticRegressionLocalRandomSGD(stepSize, miniBatchFraction, numIterations).train(input, initialWeights)
+    new LogisticRegressionLocalRandomSGD(stepSize, miniBatchFraction, numIterations).train(
+      input, initialWeights)
   }
 
   /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
+   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+   * number of iterations of gradient descent using the specified step size. Each iteration uses
    * `miniBatchFraction` fraction of the data to calculate the gradient.
    *
    * @param input RDD of (label, array of features) pairs.
@@ -180,9 +181,9 @@ object LogisticRegressionLocalRandomSGD {
   }
 
   /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to update
-   * the gradient in each iteration.
+   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+   * number of iterations of gradient descent using the specified step size. We use the entire data
+   * set to update the gradient in each iteration.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -200,9 +201,9 @@ object LogisticRegressionLocalRandomSGD {
   }
 
   /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
-   * the gradient in each iteration.
+   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+   * number of iterations of gradient descent using a step size of 1.0. We use the entire data set
+   * to update the gradient in each iteration.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
@@ -218,12 +219,14 @@ object LogisticRegressionLocalRandomSGD {
 
   def main(args: Array[String]) {
     if (args.length != 5) {
-      println("Usage: LogisticRegression <master> <input_dir> <step_size> <regularization_parameter> <niters>")
+      println("Usage: LogisticRegression <master> <input_dir> <step_size> " +
+        "<regularization_parameter> <niters>")
       System.exit(1)
     }
     val sc = new SparkContext(args(0), "LogisticRegression")
     val data = MLUtils.loadLabeledData(sc, args(1)).map(yx => (yx._1.toInt, yx._2))
-    val model = LogisticRegressionLocalRandomSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
+    val model = LogisticRegressionLocalRandomSGD.train(
+      data, args(4).toInt, args(2).toDouble, args(3).toDouble)
 
     sc.stop()
   }
diff --git a/mllib/src/main/scala/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/spark/mllib/classification/SVM.scala
index 15b689e7e0..3a6a12814a 100644
--- a/mllib/src/main/scala/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/SVM.scala
@@ -53,8 +53,8 @@ class SVMModel(
 
 
 
-class SVMLocalRandomSGD private (var stepSize: Double, var regParam: Double, var miniBatchFraction: Double,
-    var numIters: Int)
+class SVMLocalRandomSGD private (var stepSize: Double, var regParam: Double,
+    var miniBatchFraction: Double, var numIters: Int)
   extends Logging {
 
   /**
@@ -163,7 +163,8 @@ object SVMLocalRandomSGD {
       initialWeights: Array[Double])
     : SVMModel =
   {
-    new SVMLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(input, initialWeights)
+    new SVMLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(
+      input, initialWeights)
   }
 
   /**
@@ -190,8 +191,8 @@ object SVMLocalRandomSGD {
 
   /**
    * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to update
-   * the gradient in each iteration.
+   * of iterations of gradient descent using the specified step size. We use the entire data set to
+   * update the gradient in each iteration.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -211,8 +212,8 @@ object SVMLocalRandomSGD {
 
   /**
    * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
-   * the gradient in each iteration.
+   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+   * update the gradient in each iteration.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
diff --git a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
index bf506d2f24..3ebc1409b6 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
@@ -25,7 +25,7 @@ abstract class Updater extends Serializable {
    * Compute an updated value for weights given the gradient, stepSize and iteration number.
    * Also returns the regularization value computed using the *updated* weights.
    *
-   * @param weightsOlds - Column matrix of size nx1 where n is the number of features.
+   * @param weightsOld - Column matrix of size nx1 where n is the number of features.
    * @param gradient - Column matrix of size nx1 where n is the number of features.
    * @param stepSize - step size across iterations
    * @param iter - Iteration number
@@ -34,8 +34,8 @@ abstract class Updater extends Serializable {
    * @return A tuple of 2 elements. The first element is a column matrix containing updated weights,
    *         and the second element is the regularization value computed using updated weights.
    */
-  def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int, regParam: Double):
-      (DoubleMatrix, Double)
+  def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int,
+      regParam: Double): (DoubleMatrix, Double)
 }
 
 class SimpleUpdater extends Updater {
@@ -64,10 +64,10 @@ class L1Updater extends Updater {
     val newWeights = weightsOld.sub(normGradient)
     // Soft thresholding
     val shrinkageVal = regParam * thisIterStepSize
-    (0 until newWeights.length).foreach(i => {
+    (0 until newWeights.length).foreach { i =>
       val wi = newWeights.get(i)
       newWeights.put(i, signum(wi) * max(0.0, abs(wi) - shrinkageVal))
-      })
+    }
     (newWeights, newWeights.norm1 * regParam)
   }
 }
diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
index 7da96397a6..7281b2fcb9 100644
--- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
@@ -35,8 +35,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
  * of the elements within this block, and the list of destination blocks that each user or
  * product will need to send its feature vector to.
  */
-private[recommendation] case class OutLinkBlock(
-  elementIds: Array[Int], shouldSend: Array[BitSet])
+private[recommendation] case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[BitSet])
 
 
 /**
@@ -105,7 +104,7 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
   }
 
   /**
-   * Run ALS with the configured parmeters on an input RDD of (user, product, rating) triples.
+   * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
    * Returns a MatrixFactorizationModel with feature vectors for each user and product.
    */
   def train(ratings: RDD[(Int, Int, Double)]): MatrixFactorizationModel = {
diff --git a/mllib/src/main/scala/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/spark/mllib/regression/Lasso.scala
index 1952658bb2..e8b1ed8a48 100644
--- a/mllib/src/main/scala/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/Lasso.scala
@@ -53,8 +53,8 @@ class LassoModel(
 }
 
 
-class LassoLocalRandomSGD private (var stepSize: Double, var regParam: Double, var miniBatchFraction: Double,
-    var numIters: Int)
+class LassoLocalRandomSGD private (var stepSize: Double, var regParam: Double,
+    var miniBatchFraction: Double, var numIters: Int)
   extends Logging {
 
   /**
@@ -163,7 +163,8 @@ object LassoLocalRandomSGD {
       initialWeights: Array[Double])
     : LassoModel =
   {
-    new LassoLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(input, initialWeights)
+    new LassoLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(
+      input, initialWeights)
   }
 
   /**
@@ -190,8 +191,8 @@ object LassoLocalRandomSGD {
 
   /**
    * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to update
-   * the gradient in each iteration.
+   * of iterations of gradient descent using the specified step size. We use the entire data set to
+   * update the gradient in each iteration.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -211,8 +212,8 @@ object LassoLocalRandomSGD {
 
   /**
    * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
-   * the gradient in each iteration.
+   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+   * update the gradient in each iteration.
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
author	Reynold Xin <reynoldx@gmail.com>	2013-07-30 13:59:32 -0700
committer	Reynold Xin <reynoldx@gmail.com>	2013-07-30 13:59:32 -0700
commit	366f7735ebe1004acf113df257950d287c50471a (patch)
tree	9f4076a34be3ba28a32d973cdc55db4d040cfe11 /mllib
parent	47011e685449edfe9f91d8f937a5d23d3b359baf (diff)
download	spark-366f7735ebe1004acf113df257950d287c50471a.tar.gz spark-366f7735ebe1004acf113df257950d287c50471a.tar.bz2 spark-366f7735ebe1004acf113df257950d287c50471a.zip