aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorShivaram Venkataraman <shivaram@eecs.berkeley.edu>2013-07-29 16:25:17 -0700
committerShivaram Venkataraman <shivaram@eecs.berkeley.edu>2013-07-29 16:25:17 -0700
commit07da72b45190f7db9daa2c6bd33577d28e19e659 (patch)
treec3bf7e2f57ca8fee63b082f4b8549075888ba6bf /mllib
parentfe7298b587b89abffefab2febac4e3861ca2c1c4 (diff)
downloadspark-07da72b45190f7db9daa2c6bd33577d28e19e659.tar.gz
spark-07da72b45190f7db9daa2c6bd33577d28e19e659.tar.bz2
spark-07da72b45190f7db9daa2c6bd33577d28e19e659.zip
Remove duplicate loss history and clarify why.
Also some minor style fixes.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala5
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala15
-rw-r--r--mllib/src/main/scala/spark/mllib/optimization/Updater.scala2
3 files changed, 9 insertions, 13 deletions
diff --git a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
index bc1c327729..bf3b05dedb 100644
--- a/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/spark/mllib/classification/LogisticRegression.scala
@@ -151,7 +151,6 @@ object LogisticRegressionLocalRandomSGD {
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
-
miniBatchFraction: Double,
initialWeights: Array[Double])
: LogisticRegressionModel =
@@ -174,7 +173,6 @@ object LogisticRegressionLocalRandomSGD {
input: RDD[(Int, Array[Double])],
numIterations: Int,
stepSize: Double,
-
miniBatchFraction: Double)
: LogisticRegressionModel =
{
@@ -195,8 +193,7 @@ object LogisticRegressionLocalRandomSGD {
def train(
input: RDD[(Int, Array[Double])],
numIterations: Int,
- stepSize: Double
- )
+ stepSize: Double)
: LogisticRegressionModel =
{
train(input, numIterations, stepSize, 1.0)
diff --git a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
index d4b83a1456..19cda26446 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/GradientDescent.scala
@@ -61,7 +61,7 @@ object GradientDescent {
// Initialize weights as a column vector
var weights = new DoubleMatrix(initialWeights.length, 1, initialWeights:_*)
- var reg_val = 0.0
+ var regVal = 0.0
for (i <- 1 to numIters) {
val (gradientSum, lossSum) = data.sample(false, miniBatchFraction, 42+i).map {
@@ -71,15 +71,14 @@ object GradientDescent {
(grad, loss)
}.reduce((a, b) => (a._1.addi(b._1), a._2 + b._2))
- stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
+ /**
+ * NOTE(Xinghao): lossSum is computed using the weights from the previous iteration
+ * and regVal is the regularization value computed in the previous iteration as well.
+ */
+ stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
val update = updater.compute(weights, gradientSum.div(miniBatchSize), stepSize, i, regParam)
weights = update._1
- reg_val = update._2
- stochasticLossHistory.append(lossSum / miniBatchSize + reg_val)
- /*
- * NOTE(Xinghao): The loss here is sum of lossSum computed using the weights before applying updater,
- * and reg_val using weights after applying updater
- */
+ regVal = update._2
}
(weights.toArray, stochasticLossHistory.toArray)
diff --git a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
index 188fe7d972..bbf21e5c28 100644
--- a/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/spark/mllib/optimization/Updater.scala
@@ -76,7 +76,7 @@ class SquaredL2Updater extends Updater {
val thisIterStepSize = stepSize / math.sqrt(iter)
val normGradient = gradient.mul(thisIterStepSize)
val newWeights = weightsOld.sub(normGradient).div(2.0 * thisIterStepSize * regParam + 1.0)
- (newWeights, pow(newWeights.norm2,2.0) * regParam)
+ (newWeights, pow(newWeights.norm2, 2.0) * regParam)
}
}