[SPARK-6973] remove skipped stage ID from completed set on the allJobsPage

Though totalStages = allStages - skippedStages is understandable. But consider the problem [SPARK-6973], I think totalStages = allStages is more reasonable. Like "2/1 (2 failed) (1 skipped)", this item also shows the skipped num, it also will be understandable. Author: Xu Tingjun <xutingjun@huawei.com> Author: Xutingjun <xutingjun@huawei.com> Author: meiyoula <1039320815@qq.com> Closes #5550 from XuTingjun/allJobsPage and squashes the following commits: a742541 [Xu Tingjun] delete the loop 40ce94b [Xutingjun] remove stage id from completed set if it retries again 6459238 [meiyoula] delete space 9e23c71 [Xu Tingjun] recover numSkippedStages b987ea7 [Xutingjun] delete skkiped stages from completed set 47525c6 [Xu Tingjun] modify total stages/tasks on the allJobsPage
author: Xu Tingjun <xutingjun@huawei.com> 2015-06-06 09:53:53 +0100
committer: Sean Owen <sowen@cloudera.com> 2015-06-06 09:53:53 +0100
commit: a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694 (patch)
tree: c76f64d3dde5224e54e90cc9d464786866e2a999 /core
parent: a71be0a36de94b3962c09f871845d745047a78e6 (diff)
download: spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.gz
spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.bz2
spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.zip
2 files changed, 8 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 1d31fce4c6..730f9806e5 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -282,7 +282,9 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
     ) {
       jobData.numActiveStages -= 1
       if (stage.failureReason.isEmpty) {
-        jobData.completedStageIndices.add(stage.stageId)
+        if (!stage.submissionTime.isEmpty) {
+          jobData.completedStageIndices.add(stage.stageId)
+        }
       } else {
         jobData.numFailedStages += 1
       }
@@ -315,6 +317,9 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
       jobData <- jobIdToData.get(jobId)
     ) {
       jobData.numActiveStages += 1
+
+      // If a stage retries again, it should be removed from completedStageIndices set
+      jobData.completedStageIndices.remove(stage.stageId)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index 3d96113aa5..f008d40180 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -22,6 +22,7 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
 import org.apache.spark.util.collection.OpenHashSet
 
+import scala.collection.mutable
 import scala.collection.mutable.HashMap
 
 private[spark] object UIData {
@@ -63,7 +64,7 @@ private[spark] object UIData {
     /* Stages */
     var numActiveStages: Int = 0,
     // This needs to be a set instead of a simple count to prevent double-counting of rerun stages:
-    var completedStageIndices: OpenHashSet[Int] = new OpenHashSet[Int](),
+    var completedStageIndices: mutable.HashSet[Int] = new mutable.HashSet[Int](),
     var numSkippedStages: Int = 0,
     var numFailedStages: Int = 0
   )
author	Xu Tingjun <xutingjun@huawei.com>	2015-06-06 09:53:53 +0100
committer	Sean Owen <sowen@cloudera.com>	2015-06-06 09:53:53 +0100
commit	a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694 (patch)
tree	c76f64d3dde5224e54e90cc9d464786866e2a999 /core
parent	a71be0a36de94b3962c09f871845d745047a78e6 (diff)
download	spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.gz spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.bz2 spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.zip