aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-03-18 15:14:13 -0700
committerReynold Xin <rxin@apache.org>2014-03-18 15:14:13 -0700
commite108b9ab94c4310ec56ef0eda99bb904133f942d (patch)
tree6a52e0b38d0db002422cc68f0b78c8284da1d5e0 /mllib
parent79e547fe5a675a9a10b6acdc73759d67725ad7c6 (diff)
downloadspark-e108b9ab94c4310ec56ef0eda99bb904133f942d.tar.gz
spark-e108b9ab94c4310ec56ef0eda99bb904133f942d.tar.bz2
spark-e108b9ab94c4310ec56ef0eda99bb904133f942d.zip
[SPARK-1260]: faster construction of features with intercept
The current implementation uses `Array(1.0, features: _*)` to construct a new array with intercept. This is not efficient for big arrays because `Array.apply` uses a for loop that iterates over the arguments. `Array.+:` is a better choice here. Also, I don't see a reason to set initial weights to ones. So I set them to zeros. JIRA: https://spark-project.atlassian.net/browse/SPARK-1260 Author: Xiangrui Meng <meng@databricks.com> Closes #161 from mengxr/sgd and squashes the following commits: b5cfc53 [Xiangrui Meng] set default weights to zeros a1439c2 [Xiangrui Meng] faster construction of features with intercept
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala8
1 files changed, 4 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index f98b0b536d..b9621530ef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -119,7 +119,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
*/
def run(input: RDD[LabeledPoint]) : M = {
val nfeatures: Int = input.first().features.length
- val initialWeights = Array.fill(nfeatures)(1.0)
+ val initialWeights = new Array[Double](nfeatures)
run(input, initialWeights)
}
@@ -134,15 +134,15 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
throw new SparkException("Input validation failed.")
}
- // Add a extra variable consisting of all 1.0's for the intercept.
+ // Prepend an extra variable consisting of all 1.0's for the intercept.
val data = if (addIntercept) {
- input.map(labeledPoint => (labeledPoint.label, Array(1.0, labeledPoint.features:_*)))
+ input.map(labeledPoint => (labeledPoint.label, labeledPoint.features.+:(1.0)))
} else {
input.map(labeledPoint => (labeledPoint.label, labeledPoint.features))
}
val initialWeightsWithIntercept = if (addIntercept) {
- Array(1.0, initialWeights:_*)
+ initialWeights.+:(1.0)
} else {
initialWeights
}