aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-02-17 17:45:16 -0800
committerJosh Rosen <joshrosen@databricks.com>2015-02-17 17:45:16 -0800
commita51fc7ef9adb6a41c4857918217f800858fced2c (patch)
treee5e942bfa1f99a0e4b16a937037c879262796004 /core
parentd46d6246d225ff3af09ebae1a09d4de2430c502d (diff)
downloadspark-a51fc7ef9adb6a41c4857918217f800858fced2c.tar.gz
spark-a51fc7ef9adb6a41c4857918217f800858fced2c.tar.bz2
spark-a51fc7ef9adb6a41c4857918217f800858fced2c.zip
[SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs()
This method is performance-sensitive and this change wasn't necessary.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala8
1 files changed, 5 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 9c355d7c3e..8b62d2405e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -190,13 +190,15 @@ class DAGScheduler(
}
private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = cacheLocs.synchronized {
- cacheLocs.getOrElseUpdate(rdd.id, {
+ // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times
+ if (!cacheLocs.contains(rdd.id)) {
val blockIds = rdd.partitions.indices.map(index => RDDBlockId(rdd.id, index)).toArray[BlockId]
val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster)
- blockIds.map { id =>
+ cacheLocs(rdd.id) = blockIds.map { id =>
locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId))
}
- })
+ }
+ cacheLocs(rdd.id)
}
private def clearCacheLocs(): Unit = cacheLocs.synchronized {