diff options
author | Sital Kedia <skedia@fb.com> | 2016-02-11 13:28:03 -0800 |
---|---|---|
committer | Kay Ousterhout <kayousterhout@gmail.com> | 2016-02-11 13:28:14 -0800 |
commit | 50fa6fd1b365d5db7e2b2c59624a365cef0d1696 (patch) | |
tree | 7fe425d81d70155772a198233609da1f6b5105cf /core | |
parent | 0d50a22084eea91d4efb0a3ed3fa59b8d9680795 (diff) | |
download | spark-50fa6fd1b365d5db7e2b2c59624a365cef0d1696.tar.gz spark-50fa6fd1b365d5db7e2b2c59624a365cef0d1696.tar.bz2 spark-50fa6fd1b365d5db7e2b2c59624a365cef0d1696.zip |
[SPARK-13279] Remove O(n^2) operation from scheduler.
This commit removes an unnecessary duplicate check in addPendingTask that meant
that scheduling a task set took time proportional to (# tasks)^2.
Author: Sital Kedia <skedia@fb.com>
Closes #11167 from sitalkedia/fix_stuck_driver and squashes the following commits:
3fe1af8 [Sital Kedia] [SPARK-13279] Remove unnecessary duplicate check in addPendingTask function
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 15 |
1 files changed, 9 insertions, 6 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index cf97877476..4b19beb43f 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -114,9 +114,14 @@ private[spark] class TaskSetManager( // treated as stacks, in which new tasks are added to the end of the // ArrayBuffer and removed from the end. This makes it faster to detect // tasks that repeatedly fail because whenever a task failed, it is put - // back at the head of the stack. They are also only cleaned up lazily; - // when a task is launched, it remains in all the pending lists except - // the one that it was launched from, but gets removed from them later. + // back at the head of the stack. These collections may contain duplicates + // for two reasons: + // (1): Tasks are only removed lazily; when a task is launched, it remains + // in all the pending lists except the one that it was launched from. + // (2): Tasks may be re-added to these lists multiple times as a result + // of failures. + // Duplicates are handled in dequeueTaskFromList, which ensures that a + // task hasn't already started running before launching it. private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]] // Set of pending tasks for each host. Similar to pendingTasksForExecutor, @@ -181,9 +186,7 @@ private[spark] class TaskSetManager( private def addPendingTask(index: Int) { // Utility method that adds `index` to a list only if it's not already there def addTo(list: ArrayBuffer[Int]) { - if (!list.contains(index)) { - list += index - } + list += index } for (loc <- tasks(index).preferredLocations) { |