From 3ce3a282c8463408f9a2db93c1748e8df8087e07 Mon Sep 17 00:00:00 2001 From: Liwei Lin Date: Wed, 7 Sep 2016 10:04:00 +0100 Subject: [SPARK-17359][SQL][MLLIB] Use ArrayBuffer.+=(A) instead of ArrayBuffer.append(A) in performance critical paths ## What changes were proposed in this pull request? We should generally use `ArrayBuffer.+=(A)` rather than `ArrayBuffer.append(A)`, because `append(A)` would involve extra boxing / unboxing. ## How was this patch tested? N/A Author: Liwei Lin Closes #14914 from lw-lin/append_to_plus_eq_v2. --- core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala | 2 +- core/src/main/scala/org/apache/spark/ui/WebUI.scala | 4 ++-- .../org/apache/spark/util/collection/ExternalAppendOnlyMap.scala | 4 ++-- .../scala/org/apache/spark/util/collection/ExternalSorter.scala | 6 +++--- core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala | 2 +- .../src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala | 3 +-- .../spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala | 2 +- core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala | 4 ++-- 8 files changed, 13 insertions(+), 14 deletions(-) (limited to 'core/src') diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 2822eb5d60..d841091a31 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -467,7 +467,7 @@ private[spark] object PythonRDD extends Logging { val length = file.readInt() val obj = new Array[Byte](length) file.readFully(obj) - objs.append(obj) + objs += obj } } catch { case eof: EOFException => // No-op diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala index 2c40e72699..38363800ec 100644 --- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala @@ -83,8 +83,8 @@ private[spark] abstract class WebUI( (request: HttpServletRequest) => page.renderJson(request), securityManager, conf, basePath) attachHandler(renderHandler) attachHandler(renderJsonHandler) - pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]()) - .append(renderHandler) + val handlers = pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]()) + handlers += renderHandler } /** Attach a handler to this UI. */ diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala index 0943528119..948cc3b099 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala @@ -184,7 +184,7 @@ class ExternalAppendOnlyMap[K, V, C]( override protected[this] def spill(collection: SizeTracker): Unit = { val inMemoryIterator = currentMap.destructiveSortedIterator(keyComparator) val diskMapIterator = spillMemoryIteratorToDisk(inMemoryIterator) - spilledMaps.append(diskMapIterator) + spilledMaps += diskMapIterator } /** @@ -215,7 +215,7 @@ class ExternalAppendOnlyMap[K, V, C]( // Flush the disk writer's contents to disk, and update relevant variables def flush(): Unit = { val segment = writer.commitAndGet() - batchSizes.append(segment.length) + batchSizes += segment.length _diskBytesSpilled += segment.length objectsWritten = 0 } diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala index 3579918fac..176f84fa2a 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala @@ -238,7 +238,7 @@ private[spark] class ExternalSorter[K, V, C]( override protected[this] def spill(collection: WritablePartitionedPairCollection[K, C]): Unit = { val inMemoryIterator = collection.destructiveSortedWritablePartitionedIterator(comparator) val spillFile = spillMemoryIteratorToDisk(inMemoryIterator) - spills.append(spillFile) + spills += spillFile } /** @@ -285,7 +285,7 @@ private[spark] class ExternalSorter[K, V, C]( // The writer is committed at the end of this process. def flush(): Unit = { val segment = writer.commitAndGet() - batchSizes.append(segment.length) + batchSizes += segment.length _diskBytesSpilled += segment.length objectsWritten = 0 } @@ -796,7 +796,7 @@ private[spark] class ExternalSorter[K, V, C]( logInfo(s"Task ${context.taskAttemptId} force spilling in-memory map to disk and " + s" it will release ${org.apache.spark.util.Utils.bytesToString(getUsed())} memory") val spillFile = spillMemoryIteratorToDisk(inMemoryIterator) - forceSpillFiles.append(spillFile) + forceSpillFiles += spillFile val spillReader = new SpillReader(spillFile) nextUpstream = (0 until numPartitions).iterator.flatMap { p => val iterator = spillReader.readNextPartition() diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala index 9ecf49b598..c9b3d657c2 100644 --- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala +++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala @@ -305,7 +305,7 @@ private[deploy] object IvyTestUtils { val allFiles = ArrayBuffer[(String, File)](javaFile) if (withPython) { val pythonFile = createPythonFile(root) - allFiles.append((pythonFile.getName, pythonFile)) + allFiles += Tuple2(pythonFile.getName, pythonFile) } if (withR) { val rFiles = createRFiles(root, className, artifact.groupId) diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala index 38bf7e5e5a..eb2b3ffd15 100644 --- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala @@ -118,8 +118,7 @@ private[memory] trait MemoryManagerSuite extends SparkFunSuite with BeforeAndAft if (numBytesToFree <= mm.storageMemoryUsed) { // We can evict enough blocks to fulfill the request for space mm.releaseStorageMemory(numBytesToFree, MemoryMode.ON_HEAP) - evictedBlocks.append( - (null, BlockStatus(StorageLevel.MEMORY_ONLY, numBytesToFree, 0L))) + evictedBlocks += Tuple2(null, BlockStatus(StorageLevel.MEMORY_ONLY, numBytesToFree, 0L)) numBytesToFree } else { // No blocks were evicted because eviction would not free enough space. diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala index ed9428820f..442941685f 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala @@ -107,7 +107,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte val blockId = new TempShuffleBlockId(UUID.randomUUID) val file = new File(tempDir, blockId.name) blockIdToFileMap.put(blockId, file) - temporaryFilesCreated.append(file) + temporaryFilesCreated += file (blockId, file) } }) diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala index c342b68f46..2695295d45 100644 --- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala @@ -150,12 +150,12 @@ class SizeEstimatorSuite val buf = new ArrayBuffer[DummyString]() for (i <- 0 until 5000) { - buf.append(new DummyString(new Array[Char](10))) + buf += new DummyString(new Array[Char](10)) } assertResult(340016)(SizeEstimator.estimate(buf.toArray)) for (i <- 0 until 5000) { - buf.append(new DummyString(arr)) + buf += new DummyString(arr) } assertResult(683912)(SizeEstimator.estimate(buf.toArray)) -- cgit v1.2.3