aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorLiwei Lin <lwlin7@gmail.com>2016-09-07 10:04:00 +0100
committerSean Owen <sowen@cloudera.com>2016-09-07 10:04:00 +0100
commit3ce3a282c8463408f9a2db93c1748e8df8087e07 (patch)
tree7814535174f3ef7294cfd20e4dfeae28fecd4693 /mllib
parent9fccde4ff80fb0fd65a9e90eb3337965e4349de4 (diff)
downloadspark-3ce3a282c8463408f9a2db93c1748e8df8087e07.tar.gz
spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.tar.bz2
spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.zip
[SPARK-17359][SQL][MLLIB] Use ArrayBuffer.+=(A) instead of ArrayBuffer.append(A) in performance critical paths
## What changes were proposed in this pull request? We should generally use `ArrayBuffer.+=(A)` rather than `ArrayBuffer.append(A)`, because `append(A)` would involve extra boxing / unboxing. ## How was this patch tested? N/A Author: Liwei Lin <lwlin7@gmail.com> Closes #14914 from lw-lin/append_to_plus_eq_v2.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala2
5 files changed, 6 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 8659cea4b8..6642999a21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -1128,7 +1128,7 @@ object Matrices {
val data = new ArrayBuffer[(Int, Int, Double)]()
dnMat.foreachActive { (i, j, v) =>
if (v != 0.0) {
- data.append((i, j + startCol, v))
+ data += Tuple3(i, j + startCol, v)
}
}
startCol += nCols
@@ -1198,7 +1198,7 @@ object Matrices {
val data = new ArrayBuffer[(Int, Int, Double)]()
dnMat.foreachActive { (i, j, v) =>
if (v != 0.0) {
- data.append((i + startRow, j, v))
+ data += Tuple3(i + startRow, j, v)
}
}
startRow += nRows
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 9782350587..ff1068417d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -257,7 +257,7 @@ class BlockMatrix @Since("1.3.0") (
val colStart = blockColIndex.toLong * colsPerBlock
val entryValues = new ArrayBuffer[MatrixEntry]()
mat.foreachActive { (i, j, v) =>
- if (v != 0.0) entryValues.append(new MatrixEntry(rowStart + i, colStart + j, v))
+ if (v != 0.0) entryValues += new MatrixEntry(rowStart + i, colStart + j, v)
}
entryValues
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index f372355005..123e0bb3e6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -252,7 +252,7 @@ object GradientDescent extends Logging {
* lossSum is computed using the weights from the previous iteration
* and regVal is the regularization value computed in the previous iteration as well.
*/
- stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
+ stochasticLossHistory += lossSum / miniBatchSize + regVal
val update = updater.compute(
weights, Vectors.fromBreeze(gradientSum / miniBatchSize.toDouble),
stepSize, i, regParam)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
index bf98bf2f5f..5f797a60f0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -95,7 +95,7 @@ class StreamingLogisticRegressionSuite extends SparkFunSuite with TestSuiteBase
// (we add a count to ensure the result is a DStream)
ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
model.trainOn(inputDStream)
- inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - B)))
+ inputDStream.foreachRDD(x => history += math.abs(model.latestModel().weights(0) - B))
inputDStream.count()
})
runStreams(ssc, numBatches, numBatches)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index 34c07ed170..eaeaa3fc1e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -109,7 +109,7 @@ class StreamingLinearRegressionSuite extends SparkFunSuite with TestSuiteBase {
// (we add a count to ensure the result is a DStream)
ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
model.trainOn(inputDStream)
- inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - 10.0)))
+ inputDStream.foreachRDD(x => history += math.abs(model.latestModel().weights(0) - 10.0))
inputDStream.count()
})
runStreams(ssc, numBatches, numBatches)