From 3893e8c576cf1a6decc18701267ce7cd8caaf521 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Thu, 1 Sep 2016 12:13:07 -0700 Subject: [SPARK-17331][CORE][MLLIB] Avoid allocating 0-length arrays ## What changes were proposed in this pull request? Avoid allocating some 0-length arrays, esp. in UTF8String, and by using Array.empty in Scala over Array[T]() ## How was this patch tested? Jenkins Author: Sean Owen Closes #14895 from srowen/SPARK-17331. --- .../spark/ml/classification/MultinomialLogisticRegression.scala | 2 +- .../src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 4 ++-- mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala | 6 +++--- .../src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala index f85ac76a8d..006f57c0ce 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala @@ -363,7 +363,7 @@ class MultinomialLogisticRegression @Since("2.1.0") ( rawCoefficients(coefIndex) } } else { - Array[Double]() + Array.empty } val coefficientArray: Array[Double] = Array.tabulate(numClasses * numFeatures) { i => diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 8b04b5de6f..e8e28ba29c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -164,8 +164,8 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e // data transformation val size = outputAttrGroup.size val oneValue = Array(1.0) - val emptyValues = Array[Double]() - val emptyIndices = Array[Int]() + val emptyValues = Array.empty[Double] + val emptyIndices = Array.empty[Int] val encode = udf { label: Double => if (label < size) { Vectors.sparse(size, Array(label.toInt), oneValue) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 4c39cf17f4..ad882c969a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -842,7 +842,7 @@ object SparseMatrix { "The expected number of nonzeros cannot be greater than Int.MaxValue.") val nnz = math.ceil(expected).toInt if (density == 0.0) { - new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]()) + new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array.empty, Array.empty) } else if (density == 1.0) { val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows) val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows) @@ -1098,7 +1098,7 @@ object Matrices { @Since("1.3.0") def horzcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { - return new DenseMatrix(0, 0, Array[Double]()) + return new DenseMatrix(0, 0, Array.empty) } else if (matrices.length == 1) { return matrices(0) } @@ -1157,7 +1157,7 @@ object Matrices { @Since("1.3.0") def vertcat(matrices: Array[Matrix]): Matrix = { if (matrices.isEmpty) { - return new DenseMatrix(0, 0, Array[Double]()) + return new DenseMatrix(0, 0, Array.empty[Double]) } else if (matrices.length == 1) { return matrices(0) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala index da5df9bf45..9a63b8a5d6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala @@ -146,7 +146,7 @@ private[stat] object ChiSqTest extends Logging { * Uniform distribution is assumed when `expected` is not passed in. */ def chiSquared(observed: Vector, - expected: Vector = Vectors.dense(Array[Double]()), + expected: Vector = Vectors.dense(Array.empty[Double]), methodName: String = PEARSON.name): ChiSqTestResult = { // Validate input arguments -- cgit v1.2.3