diff options
author | Sean Owen <sowen@cloudera.com> | 2016-09-01 12:13:07 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2016-09-01 12:13:07 -0700 |
commit | 3893e8c576cf1a6decc18701267ce7cd8caaf521 (patch) | |
tree | e7a7b61f13a348f52ae0a25162157b28203b58ca /mllib/src/main | |
parent | 2be5f8d7e0819de03971d0af6fa310793d2d0e65 (diff) | |
download | spark-3893e8c576cf1a6decc18701267ce7cd8caaf521.tar.gz spark-3893e8c576cf1a6decc18701267ce7cd8caaf521.tar.bz2 spark-3893e8c576cf1a6decc18701267ce7cd8caaf521.zip |
[SPARK-17331][CORE][MLLIB] Avoid allocating 0-length arrays
## What changes were proposed in this pull request?
Avoid allocating some 0-length arrays, esp. in UTF8String, and by using Array.empty in Scala over Array[T]()
## How was this patch tested?
Jenkins
Author: Sean Owen <sowen@cloudera.com>
Closes #14895 from srowen/SPARK-17331.
Diffstat (limited to 'mllib/src/main')
4 files changed, 7 insertions, 7 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala index f85ac76a8d..006f57c0ce 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala @@ -363,7 +363,7 @@ class MultinomialLogisticRegression @Since("2.1.0") ( rawCoefficients(coefIndex) } } else { - Array[Double]() + Array.empty } val coefficientArray: Array[Double] = Array.tabulate(numClasses * numFeatures) { i => diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 8b04b5de6f..e8e28ba29c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -164,8 +164,8 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e // data transformation val size = outputAttrGroup.size val oneValue = Array(1.0) - val emptyValues = Array[Double]() - val emptyIndices = Array[Int]() + val emptyValues = Array.empty[Double] + val emptyIndices = Array.empty[Int] val encode = udf { label: Double => if (label < size) { Vectors.sparse(size, Array(label.toInt), oneValue) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 4c39cf17f4..ad882c969a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -842,7 +842,7 @@ object SparseMatrix { "The expected number of nonzeros cannot be greater than Int.MaxValue.") val nnz = math.ceil(expected).toInt if (density == 0.0) { - new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]()) + new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array.empty, Array.empty) } else if (density == 1.0) { val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows) val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows) @@ -1098,7 +1098,7 @@ object Matrices { @Since("1.3.0") def horzcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { - return new DenseMatrix(0, 0, Array[Double]()) + return new DenseMatrix(0, 0, Array.empty) } else if (matrices.length == 1) { return matrices(0) } @@ -1157,7 +1157,7 @@ object Matrices { @Since("1.3.0") def vertcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { - return new DenseMatrix(0, 0, Array[Double]()) + return new DenseMatrix(0, 0, Array.empty[Double]) } else if (matrices.length == 1) { return matrices(0) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala index da5df9bf45..9a63b8a5d6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala @@ -146,7 +146,7 @@ private[stat] object ChiSqTest extends Logging { * Uniform distribution is assumed when `expected` is not passed in. */ def chiSquared(observed: Vector, - expected: Vector = Vectors.dense(Array[Double]()), + expected: Vector = Vectors.dense(Array.empty[Double]), methodName: String = PEARSON.name): ChiSqTestResult = { // Validate input arguments |