[SPARK-17359][SQL][MLLIB] Use ArrayBuffer.+=(A) instead of ArrayBuffer.append(A) in performance critical paths

## What changes were proposed in this pull request? We should generally use `ArrayBuffer.+=(A)` rather than `ArrayBuffer.append(A)`, because `append(A)` would involve extra boxing / unboxing. ## How was this patch tested? N/A Author: Liwei Lin <lwlin7@gmail.com> Closes #14914 from lw-lin/append_to_plus_eq_v2.
author: Liwei Lin <lwlin7@gmail.com> 2016-09-07 10:04:00 +0100
committer: Sean Owen <sowen@cloudera.com> 2016-09-07 10:04:00 +0100
commit: 3ce3a282c8463408f9a2db93c1748e8df8087e07 (patch)
tree: 7814535174f3ef7294cfd20e4dfeae28fecd4693 /mllib
parent: 9fccde4ff80fb0fd65a9e90eb3337965e4349de4 (diff)
download: spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.tar.gz
spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.tar.bz2
spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.zip
5 files changed, 6 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 8659cea4b8..6642999a21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -1128,7 +1128,7 @@ object Matrices {
             val data = new ArrayBuffer[(Int, Int, Double)]()
             dnMat.foreachActive { (i, j, v) =>
               if (v != 0.0) {
-                data.append((i, j + startCol, v))
+                data += Tuple3(i, j + startCol, v)
               }
             }
             startCol += nCols
@@ -1198,7 +1198,7 @@ object Matrices {
             val data = new ArrayBuffer[(Int, Int, Double)]()
             dnMat.foreachActive { (i, j, v) =>
               if (v != 0.0) {
-                data.append((i + startRow, j, v))
+                data += Tuple3(i + startRow, j, v)
               }
             }
             startRow += nRows
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 9782350587..ff1068417d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -257,7 +257,7 @@ class BlockMatrix @Since("1.3.0") (
       val colStart = blockColIndex.toLong * colsPerBlock
       val entryValues = new ArrayBuffer[MatrixEntry]()
       mat.foreachActive { (i, j, v) =>
-        if (v != 0.0) entryValues.append(new MatrixEntry(rowStart + i, colStart + j, v))
+        if (v != 0.0) entryValues += new MatrixEntry(rowStart + i, colStart + j, v)
       }
       entryValues
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index f372355005..123e0bb3e6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -252,7 +252,7 @@ object GradientDescent extends Logging {
          * lossSum is computed using the weights from the previous iteration
          * and regVal is the regularization value computed in the previous iteration as well.
          */
-        stochasticLossHistory.append(lossSum / miniBatchSize + regVal)
+        stochasticLossHistory += lossSum / miniBatchSize + regVal
         val update = updater.compute(
           weights, Vectors.fromBreeze(gradientSum / miniBatchSize.toDouble),
           stepSize, i, regParam)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
index bf98bf2f5f..5f797a60f0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -95,7 +95,7 @@ class StreamingLogisticRegressionSuite extends SparkFunSuite with TestSuiteBase
     // (we add a count to ensure the result is a DStream)
     ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
       model.trainOn(inputDStream)
-      inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - B)))
+      inputDStream.foreachRDD(x => history += math.abs(model.latestModel().weights(0) - B))
       inputDStream.count()
     })
     runStreams(ssc, numBatches, numBatches)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index 34c07ed170..eaeaa3fc1e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -109,7 +109,7 @@ class StreamingLinearRegressionSuite extends SparkFunSuite with TestSuiteBase {
     // (we add a count to ensure the result is a DStream)
     ssc = setupStreams(input, (inputDStream: DStream[LabeledPoint]) => {
       model.trainOn(inputDStream)
-      inputDStream.foreachRDD(x => history.append(math.abs(model.latestModel().weights(0) - 10.0)))
+      inputDStream.foreachRDD(x => history += math.abs(model.latestModel().weights(0) - 10.0))
       inputDStream.count()
     })
     runStreams(ssc, numBatches, numBatches)
author	Liwei Lin <lwlin7@gmail.com>	2016-09-07 10:04:00 +0100
committer	Sean Owen <sowen@cloudera.com>	2016-09-07 10:04:00 +0100
commit	3ce3a282c8463408f9a2db93c1748e8df8087e07 (patch)
tree	7814535174f3ef7294cfd20e4dfeae28fecd4693 /mllib
parent	9fccde4ff80fb0fd65a9e90eb3337965e4349de4 (diff)
download	spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.tar.gz spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.tar.bz2 spark-3ce3a282c8463408f9a2db93c1748e8df8087e07.zip