diff options
author | Josh Rosen <joshrosen@databricks.com> | 2015-02-17 17:45:16 -0800 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2015-02-17 17:45:16 -0800 |
commit | a51fc7ef9adb6a41c4857918217f800858fced2c (patch) | |
tree | e5e942bfa1f99a0e4b16a937037c879262796004 /core | |
parent | d46d6246d225ff3af09ebae1a09d4de2430c502d (diff) | |
download | spark-a51fc7ef9adb6a41c4857918217f800858fced2c.tar.gz spark-a51fc7ef9adb6a41c4857918217f800858fced2c.tar.bz2 spark-a51fc7ef9adb6a41c4857918217f800858fced2c.zip |
[SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs()
This method is performance-sensitive and this change wasn't necessary.
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 9c355d7c3e..8b62d2405e 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -190,13 +190,15 @@ class DAGScheduler( } private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = cacheLocs.synchronized { - cacheLocs.getOrElseUpdate(rdd.id, { + // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times + if (!cacheLocs.contains(rdd.id)) { val blockIds = rdd.partitions.indices.map(index => RDDBlockId(rdd.id, index)).toArray[BlockId] val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster) - blockIds.map { id => + cacheLocs(rdd.id) = blockIds.map { id => locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId)) } - }) + } + cacheLocs(rdd.id) } private def clearCacheLocs(): Unit = cacheLocs.synchronized { |