aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorReynold Xin <reynoldx@gmail.com>2013-07-30 13:59:32 -0700
committerReynold Xin <reynoldx@gmail.com>2013-07-30 13:59:32 -0700
commit366f7735ebe1004acf113df257950d287c50471a (patch)
tree9f4076a34be3ba28a32d973cdc55db4d040cfe11 /mllib
parent47011e685449edfe9f91d8f937a5d23d3b359baf (diff)
downloadspark-366f7735ebe1004acf113df257950d287c50471a.tar.gz
spark-366f7735ebe1004acf113df257950d287c50471a.tar.bz2
spark-366f7735ebe1004acf113df257950d287c50471a.zip
Minor style cleanup of mllib.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala29
-rw-r--r--mllib/src/main/scala/spark/mllib/classification/SVM.scala15
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/Updater.scala10
-rw-r--r--mllib/src/main/scala/spark/mllib/recommendation/ALS.scala5
-rw-r--r--mllib/src/main/scala/spark/mllib/regression/Lasso.scala15
5 files changed, 39 insertions, 35 deletions
diff --git a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
index bf3b05dedb..203aa8fdd4 100644
--- a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
@@ -135,8 +135,8 @@ class LogisticRegressionLocalRandomSGD private (var stepSize: Double, var miniBa
object LogisticRegressionLocalRandomSGD {
/**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. Each iteration uses
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using the specified step size. Each iteration uses
* `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
* gradient descent are initialized using the initial weights provided.
*
@@ -155,12 +155,13 @@ object LogisticRegressionLocalRandomSGD {
initialWeights: Array[Double])
: LogisticRegressionModel =
{
- new LogisticRegressionLocalRandomSGD(stepSize, miniBatchFraction, numIterations).train(input, initialWeights)
+ new LogisticRegressionLocalRandomSGD(stepSize, miniBatchFraction, numIterations).train(
+ input, initialWeights)
}
/**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. Each iteration uses
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using the specified step size. Each iteration uses
* `miniBatchFraction` fraction of the data to calculate the gradient.
*
* @param input RDD of (label, array of features) pairs.
@@ -180,9 +181,9 @@ object LogisticRegressionLocalRandomSGD {
}
/**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. We use the entire data set to update
- * the gradient in each iteration.
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using the specified step size. We use the entire data
+ * set to update the gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -200,9 +201,9 @@ object LogisticRegressionLocalRandomSGD {
}
/**
- * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
- * the gradient in each iteration.
+ * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
+ * number of iterations of gradient descent using a step size of 1.0. We use the entire data set
+ * to update the gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
@@ -218,12 +219,14 @@ object LogisticRegressionLocalRandomSGD {
def main(args: Array[String]) {
if (args.length != 5) {
- println("Usage: LogisticRegression <master> <input_dir> <step_size> <regularization_parameter> <niters>")
+ println("Usage: LogisticRegression <master> <input_dir> <step_size> " +
+ "<regularization_parameter> <niters>")
System.exit(1)
}
val sc = new SparkContext(args(0), "LogisticRegression")
val data = MLUtils.loadLabeledData(sc, args(1)).map(yx => (yx._1.toInt, yx._2))
- val model = LogisticRegressionLocalRandomSGD.train(data, args(4).toInt, args(2).toDouble, args(3).toDouble)
+ val model = LogisticRegressionLocalRandomSGD.train(
+ data, args(4).toInt, args(2).toDouble, args(3).toDouble)
sc.stop()
}
diff --git a/mllib/src/main/scala/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/spark/mllib/classification/SVM.scala
index 15b689e7e0..3a6a12814a 100644
--- a/mllib/src/main/scala/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/SVM.scala
@@ -53,8 +53,8 @@ class SVMModel(
-class SVMLocalRandomSGD private (var stepSize: Double, var regParam: Double, var miniBatchFraction: Double,
- var numIters: Int)
+class SVMLocalRandomSGD private (var stepSize: Double, var regParam: Double,
+ var miniBatchFraction: Double, var numIters: Int)
extends Logging {
/**
@@ -163,7 +163,8 @@ object SVMLocalRandomSGD {
initialWeights: Array[Double])
: SVMModel =
{
- new SVMLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(input, initialWeights)
+ new SVMLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(
+ input, initialWeights)
}
/**
@@ -190,8 +191,8 @@ object SVMLocalRandomSGD {
/**
* Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. We use the entire data set to update
- * the gradient in each iteration.
+ * of iterations of gradient descent using the specified step size. We use the entire data set to
+ * update the gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -211,8 +212,8 @@ object SVMLocalRandomSGD {
/**
* Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
- * the gradient in each iteration.
+ * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+ * update the gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
diff --git a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
index bf506d2f24..3ebc1409b6 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
@@ -25,7 +25,7 @@ abstract class Updater extends Serializable {
* Compute an updated value for weights given the gradient, stepSize and iteration number.
* Also returns the regularization value computed using the *updated* weights.
*
- * @param weightsOlds - Column matrix of size nx1 where n is the number of features.
+ * @param weightsOld - Column matrix of size nx1 where n is the number of features.
* @param gradient - Column matrix of size nx1 where n is the number of features.
* @param stepSize - step size across iterations
* @param iter - Iteration number
@@ -34,8 +34,8 @@ abstract class Updater extends Serializable {
* @return A tuple of 2 elements. The first element is a column matrix containing updated weights,
* and the second element is the regularization value computed using updated weights.
*/
- def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int, regParam: Double):
- (DoubleMatrix, Double)
+ def compute(weightsOld: DoubleMatrix, gradient: DoubleMatrix, stepSize: Double, iter: Int,
+ regParam: Double): (DoubleMatrix, Double)
}
class SimpleUpdater extends Updater {
@@ -64,10 +64,10 @@ class L1Updater extends Updater {
val newWeights = weightsOld.sub(normGradient)
// Soft thresholding
val shrinkageVal = regParam * thisIterStepSize
- (0 until newWeights.length).foreach(i => {
+ (0 until newWeights.length).foreach { i =>
val wi = newWeights.get(i)
newWeights.put(i, signum(wi) * max(0.0, abs(wi) - shrinkageVal))
- })
+ }
(newWeights, newWeights.norm1 * regParam)
}
}
diff --git a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
index 7da96397a6..7281b2fcb9 100644
--- a/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/spark/mllib/recommendation/ALS.scala
@@ -35,8 +35,7 @@ import org.jblas.{DoubleMatrix, SimpleBlas, Solve}
* of the elements within this block, and the list of destination blocks that each user or
* product will need to send its feature vector to.
*/
-private[recommendation] case class OutLinkBlock(
- elementIds: Array[Int], shouldSend: Array[BitSet])
+private[recommendation] case class OutLinkBlock(elementIds: Array[Int], shouldSend: Array[BitSet])
/**
@@ -105,7 +104,7 @@ class ALS private (var numBlocks: Int, var rank: Int, var iterations: Int, var l
}
/**
- * Run ALS with the configured parmeters on an input RDD of (user, product, rating) triples.
+ * Run ALS with the configured parameters on an input RDD of (user, product, rating) triples.
* Returns a MatrixFactorizationModel with feature vectors for each user and product.
*/
def train(ratings: RDD[(Int, Int, Double)]): MatrixFactorizationModel = {
diff --git a/mllib/src/main/scala/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/spark/mllib/regression/Lasso.scala
index 1952658bb2..e8b1ed8a48 100644
--- a/mllib/src/main/scala/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/Lasso.scala
@@ -53,8 +53,8 @@ class LassoModel(
}
-class LassoLocalRandomSGD private (var stepSize: Double, var regParam: Double, var miniBatchFraction: Double,
- var numIters: Int)
+class LassoLocalRandomSGD private (var stepSize: Double, var regParam: Double,
+ var miniBatchFraction: Double, var numIters: Int)
extends Logging {
/**
@@ -163,7 +163,8 @@ object LassoLocalRandomSGD {
initialWeights: Array[Double])
: LassoModel =
{
- new LassoLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(input, initialWeights)
+ new LassoLocalRandomSGD(stepSize, regParam, miniBatchFraction, numIterations).train(
+ input, initialWeights)
}
/**
@@ -190,8 +191,8 @@ object LassoLocalRandomSGD {
/**
* Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using the specified step size. We use the entire data set to update
- * the gradient in each iteration.
+ * of iterations of gradient descent using the specified step size. We use the entire data set to
+ * update the gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param stepSize Step size to be used for each iteration of Gradient Descent.
@@ -211,8 +212,8 @@ object LassoLocalRandomSGD {
/**
* Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
- * of iterations of gradient descent using a step size of 1.0. We use the entire data set to update
- * the gradient in each iteration.
+ * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
+ * update the gradient in each iteration.
*
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.