aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@eecs.berkeley.edu>2013-07-17 14:20:24 -0700
committerShivaram Venkataraman <shivaram@eecs.berkeley.edu>2013-07-17 14:20:24 -0700
commit3bf989713654129ad35a80309d1b354ca5ddd06c (patch)
treeed78bfe242c04f688380263b0e9c79bd01dfd367 /mllib
parent84fa20c2a135f54745ddde9abb4f5e60af8856d1 (diff)
downloadspark-3bf989713654129ad35a80309d1b354ca5ddd06c.tar.gz
spark-3bf989713654129ad35a80309d1b354ca5ddd06c.tar.bz2
spark-3bf989713654129ad35a80309d1b354ca5ddd06c.zip
Rename loss -> stochasticLoss and add a note to explain why we have
multiple train methods.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala9
-rw-r--r--mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala10
-rw-r--r--mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala2
3 files changed, 13 insertions, 8 deletions
diff --git a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
index 2ac0808357..77f5a7ae24 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
@@ -40,7 +40,8 @@ object GradientDescent {
* one iteration of SGD. Default value 1.0.
*
* @return weights - Column matrix containing weights for every feature.
- * @return lossHistory - Array containing the loss computed for every iteration.
+ * @return stochasticLossHistory - Array containing the stochastic loss computed for
+ * every iteration.
*/
def runMiniBatchSGD(
data: RDD[(Double, Array[Double])],
@@ -51,7 +52,7 @@ object GradientDescent {
initialWeights: Array[Double],
miniBatchFraction: Double=1.0) : (DoubleMatrix, Array[Double]) = {
- val lossHistory = new ArrayBuffer[Double](numIters)
+ val stochasticLossHistory = new ArrayBuffer[Double](numIters)
val nexamples: Long = data.count()
val miniBatchSize = nexamples * miniBatchFraction
@@ -69,12 +70,12 @@ object GradientDescent {
(grad, loss)
}.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))
- lossHistory.append(lossSum / miniBatchSize + reg_val)
+ stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i)
weights = update._1
reg_val = update._2
}
- (weights, lossHistory.toArray)
+ (weights, stochasticLossHistory.toArray)
}
}
diff --git a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
index dab15aa386..664baf33a3 100644
--- a/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/LogisticRegression.scala
@@ -30,7 +30,7 @@ import org.jblas.DoubleMatrix
class LogisticRegressionModel(
val weights: DoubleMatrix,
val intercept: Double,
- val losses: Array[Double]) extends RegressionModel {
+ val stochasticLosses: Array[Double]) extends RegressionModel {
override def predict(testData: spark.RDD[Array[Double]]) = {
testData.map { x =>
@@ -114,7 +114,7 @@ class LogisticRegression private (var stepSize: Double, var miniBatchFraction: D
val initalWeightsWithIntercept = Array(1.0, initialWeights:_*)
- val (weights, losses) = GradientDescent.runMiniBatchSGD(
+ val (weights, stochasticLosses) = GradientDescent.runMiniBatchSGD(
data,
new LogisticGradient(),
new SimpleUpdater(),
@@ -126,17 +126,19 @@ class LogisticRegression private (var stepSize: Double, var miniBatchFraction: D
val weightsScaled = weights.getRange(1, weights.length)
val intercept = weights.get(0)
- val model = new LogisticRegressionModel(weightsScaled, intercept, losses)
+ val model = new LogisticRegressionModel(weightsScaled, intercept, stochasticLosses)
logInfo("Final model weights " + model.weights)
logInfo("Final model intercept " + model.intercept)
- logInfo("Last 10 losses " + model.losses.takeRight(10).mkString(", "))
+ logInfo("Last 10 stochastic losses " + model.stochasticLosses.takeRight(10).mkString(", "))
model
}
}
/**
* Top-level methods for calling Logistic Regression.
+ * NOTE(shivaram): We use multiple train methods instead of default arguments to support
+ * Java programs.
*/
object LogisticRegression {
diff --git a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
index 7c7f912b43..f724edd732 100644
--- a/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/regression/RidgeRegression.scala
@@ -164,6 +164,8 @@ class RidgeRegression private (var lambdaLow: Double, var lambdaHigh: Double)
/**
* Top-level methods for calling Ridge Regression.
+ * NOTE(shivaram): We use multiple train methods instead of default arguments to support
+ * Java programs.
*/
object RidgeRegression {