From d33021b3124b53e6ef3953f163b312a8ead186a2 Mon Sep 17 00:00:00 2001 From: Jin Xing Date: Mon, 6 Feb 2017 10:51:26 -0800 Subject: [SPARK-19398] Change one misleading log in TaskSetManager. ## What changes were proposed in this pull request? Log below is misleading: ``` if (successful(index)) { logInfo( s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, " + "but another instance of the task has already succeeded, " + "so not re-queuing the task to be re-executed.") } ``` If fetch failed, the task is marked as successful in `TaskSetManager:: handleFailedTask`. Then log above will be printed. The `successful` just means task will not be scheduled any longer, not a real success. ## How was this patch tested? Existing unit tests can cover this. Author: jinxing Closes #16738 from jinxing64/SPARK-19398. --- .../scala/org/apache/spark/scheduler/TaskSetManager.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index c7ff13cebf..88251435a5 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -70,6 +70,10 @@ private[spark] class TaskSetManager( val tasks = taskSet.tasks val numTasks = tasks.length val copiesRunning = new Array[Int](numTasks) + + // For each task, tracks whether a copy of the task has succeeded. A task will also be + // marked as "succeeded" if it failed with a fetch failure, in which case it should not + // be re-run because the missing map data needs to be regenerated first. val successful = new Array[Boolean](numTasks) private val numFailures = new Array[Int](numTasks) @@ -797,10 +801,10 @@ private[spark] class TaskSetManager( sched.dagScheduler.taskEnded(tasks(index), reason, null, accumUpdates, info) if (successful(index)) { - logInfo( - s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, " + - "but another instance of the task has already succeeded, " + - "so not re-queuing the task to be re-executed.") + logInfo(s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, but the task will not" + + s" be re-executed (either because the task failed with a shuffle data fetch failure," + + s" so the previous stage needs to be re-run, or because a different copy of the task" + + s" has already succeeded).") } else { addPendingTask(index) } -- cgit v1.2.3