aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-09-01 12:13:07 -0700
committerJosh Rosen <joshrosen@databricks.com>2016-09-01 12:13:07 -0700
commit3893e8c576cf1a6decc18701267ce7cd8caaf521 (patch)
treee7a7b61f13a348f52ae0a25162157b28203b58ca
parent2be5f8d7e0819de03971d0af6fa310793d2d0e65 (diff)
downloadspark-3893e8c576cf1a6decc18701267ce7cd8caaf521.tar.gz
spark-3893e8c576cf1a6decc18701267ce7cd8caaf521.tar.bz2
spark-3893e8c576cf1a6decc18701267ce7cd8caaf521.zip
[SPARK-17331][CORE][MLLIB] Avoid allocating 0-length arrays
## What changes were proposed in this pull request? Avoid allocating some 0-length arrays, esp. in UTF8String, and by using Array.empty in Scala over Array[T]() ## How was this patch tested? Jenkins Author: Sean Owen <sowen@cloudera.com> Closes #14895 from srowen/SPARK-17331.
-rw-r--r--common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java8
-rw-r--r--core/src/main/scala/org/apache/spark/MapOutputTracker.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala2
-rw-r--r--docs/streaming-kafka-0-8-integration.md2
-rw-r--r--mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala2
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala2
11 files changed, 20 insertions, 20 deletions
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 54a5456924..dc03d893a5 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -470,7 +470,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
if (s > e) {
// empty string
- return UTF8String.fromBytes(new byte[0]);
+ return EMPTY_UTF8;
} else {
return copyUTF8String(s, e);
}
@@ -482,7 +482,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
if (s == this.numBytes) {
// empty string
- return UTF8String.fromBytes(new byte[0]);
+ return EMPTY_UTF8;
} else {
return copyUTF8String(s, this.numBytes - 1);
}
@@ -495,7 +495,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (e < 0) {
// empty string
- return UTF8String.fromBytes(new byte[0]);
+ return EMPTY_UTF8;
} else {
return copyUTF8String(0, e);
}
@@ -761,7 +761,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
if (numInputs == 0) {
// Return an empty string if there is no input, or all the inputs are null.
- return fromBytes(new byte[0]);
+ return EMPTY_UTF8;
}
// Allocate a new byte array, and copy the inputs one by one into it.
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 486d535da0..7f8f0f5131 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -383,7 +383,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
/** Register multiple map output information for the given shuffle */
def registerMapOutputs(shuffleId: Int, statuses: Array[MapStatus], changeEpoch: Boolean = false) {
- mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
+ mapStatuses.put(shuffleId, statuses.clone())
if (changeEpoch) {
incrementEpoch()
}
@@ -535,7 +535,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
true
case None =>
logDebug("cached status not found for : " + shuffleId)
- statuses = mapStatuses.getOrElse(shuffleId, Array[MapStatus]())
+ statuses = mapStatuses.getOrElse(shuffleId, Array.empty[MapStatus])
epochGotten = epoch
false
}
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
index 32931d59ac..b5738b9a95 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
@@ -43,7 +43,7 @@ class ZippedWithIndexRDD[T: ClassTag](prev: RDD[T]) extends RDD[(T, Long)](prev)
@transient private val startIndices: Array[Long] = {
val n = prev.partitions.length
if (n == 0) {
- Array[Long]()
+ Array.empty
} else if (n == 1) {
Array(0L)
} else {
diff --git a/docs/streaming-kafka-0-8-integration.md b/docs/streaming-kafka-0-8-integration.md
index d3fc9adfcf..58b17aa4ce 100644
--- a/docs/streaming-kafka-0-8-integration.md
+++ b/docs/streaming-kafka-0-8-integration.md
@@ -139,7 +139,7 @@ Next, we discuss how to use this approach in your streaming application.
<div class="codetabs">
<div data-lang="scala" markdown="1">
// Hold a reference to the current offset ranges, so it can be used downstream
- var offsetRanges = Array[OffsetRange]()
+ var offsetRanges = Array.empty[OffsetRange]
directKafkaStream.transform { rdd =>
offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index f1ecc65af1..98080bb71a 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -713,7 +713,7 @@ object SparseMatrix {
"The expected number of nonzeros cannot be greater than Int.MaxValue.")
val nnz = math.ceil(expected).toInt
if (density == 0.0) {
- new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]())
+ new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array.empty, Array.empty)
} else if (density == 1.0) {
val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows)
val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows)
@@ -961,7 +961,7 @@ object Matrices {
@Since("2.0.0")
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty)
} else if (matrices.length == 1) {
return matrices(0)
}
@@ -1020,7 +1020,7 @@ object Matrices {
@Since("2.0.0")
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty)
} else if (matrices.length == 1) {
return matrices(0)
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
index f85ac76a8d..006f57c0ce 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultinomialLogisticRegression.scala
@@ -363,7 +363,7 @@ class MultinomialLogisticRegression @Since("2.1.0") (
rawCoefficients(coefIndex)
}
} else {
- Array[Double]()
+ Array.empty
}
val coefficientArray: Array[Double] = Array.tabulate(numClasses * numFeatures) { i =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 8b04b5de6f..e8e28ba29c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -164,8 +164,8 @@ class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) e
// data transformation
val size = outputAttrGroup.size
val oneValue = Array(1.0)
- val emptyValues = Array[Double]()
- val emptyIndices = Array[Int]()
+ val emptyValues = Array.empty[Double]
+ val emptyIndices = Array.empty[Int]
val encode = udf { label: Double =>
if (label < size) {
Vectors.sparse(size, Array(label.toInt), oneValue)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 4c39cf17f4..ad882c969a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -842,7 +842,7 @@ object SparseMatrix {
"The expected number of nonzeros cannot be greater than Int.MaxValue.")
val nnz = math.ceil(expected).toInt
if (density == 0.0) {
- new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]())
+ new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array.empty, Array.empty)
} else if (density == 1.0) {
val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows)
val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows)
@@ -1098,7 +1098,7 @@ object Matrices {
@Since("1.3.0")
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty)
} else if (matrices.length == 1) {
return matrices(0)
}
@@ -1157,7 +1157,7 @@ object Matrices {
@Since("1.3.0")
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
- return new DenseMatrix(0, 0, Array[Double]())
+ return new DenseMatrix(0, 0, Array.empty[Double])
} else if (matrices.length == 1) {
return matrices(0)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index da5df9bf45..9a63b8a5d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -146,7 +146,7 @@ private[stat] object ChiSqTest extends Logging {
* Uniform distribution is assumed when `expected` is not passed in.
*/
def chiSquared(observed: Vector,
- expected: Vector = Vectors.dense(Array[Double]()),
+ expected: Vector = Vectors.dense(Array.empty[Double]),
methodName: String = PEARSON.name): ChiSqTestResult = {
// Validate input arguments
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
index 24adeadf95..747ab1809f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java
@@ -191,7 +191,7 @@ public class DataTypes {
* Creates a StructType with the given list of StructFields ({@code fields}).
*/
public static StructType createStructType(List<StructField> fields) {
- return createStructType(fields.toArray(new StructField[0]));
+ return createStructType(fields.toArray(new StructField[fields.size()]));
}
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index cbd504603b..37153e545a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -322,6 +322,7 @@ object JdbcUtils extends Logging {
conn.commit()
}
committed = true
+ Iterator.empty
} catch {
case e: SQLException =>
val cause = e.getNextException
@@ -351,7 +352,6 @@ object JdbcUtils extends Logging {
}
}
}
- Array[Byte]().iterator
}
/**