diff options
author | Xu Tingjun <xutingjun@huawei.com> | 2015-06-06 09:53:53 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-06-06 09:53:53 +0100 |
commit | a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694 (patch) | |
tree | c76f64d3dde5224e54e90cc9d464786866e2a999 /core | |
parent | a71be0a36de94b3962c09f871845d745047a78e6 (diff) | |
download | spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.gz spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.bz2 spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.zip |
[SPARK-6973] remove skipped stage ID from completed set on the allJobsPage
Though totalStages = allStages - skippedStages is understandable. But consider the problem [SPARK-6973], I think totalStages = allStages is more reasonable. Like "2/1 (2 failed) (1 skipped)", this item also shows the skipped num, it also will be understandable.
Author: Xu Tingjun <xutingjun@huawei.com>
Author: Xutingjun <xutingjun@huawei.com>
Author: meiyoula <1039320815@qq.com>
Closes #5550 from XuTingjun/allJobsPage and squashes the following commits:
a742541 [Xu Tingjun] delete the loop
40ce94b [Xutingjun] remove stage id from completed set if it retries again
6459238 [meiyoula] delete space
9e23c71 [Xu Tingjun] recover numSkippedStages
b987ea7 [Xutingjun] delete skkiped stages from completed set
47525c6 [Xu Tingjun] modify total stages/tasks on the allJobsPage
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala | 7 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala | 3 |
2 files changed, 8 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala index 1d31fce4c6..730f9806e5 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala @@ -282,7 +282,9 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { ) { jobData.numActiveStages -= 1 if (stage.failureReason.isEmpty) { - jobData.completedStageIndices.add(stage.stageId) + if (!stage.submissionTime.isEmpty) { + jobData.completedStageIndices.add(stage.stageId) + } } else { jobData.numFailedStages += 1 } @@ -315,6 +317,9 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { jobData <- jobIdToData.get(jobId) ) { jobData.numActiveStages += 1 + + // If a stage retries again, it should be removed from completedStageIndices set + jobData.completedStageIndices.remove(stage.stageId) } } diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala index 3d96113aa5..f008d40180 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala @@ -22,6 +22,7 @@ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet +import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { @@ -63,7 +64,7 @@ private[spark] object UIData { /* Stages */ var numActiveStages: Int = 0, // This needs to be a set instead of a simple count to prevent double-counting of rerun stages: - var completedStageIndices: OpenHashSet[Int] = new OpenHashSet[Int](), + var completedStageIndices: mutable.HashSet[Int] = new mutable.HashSet[Int](), var numSkippedStages: Int = 0, var numFailedStages: Int = 0 ) |