aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorXu Tingjun <xutingjun@huawei.com>2015-06-06 09:53:53 +0100
committerSean Owen <sowen@cloudera.com>2015-06-06 09:53:53 +0100
commita8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694 (patch)
treec76f64d3dde5224e54e90cc9d464786866e2a999 /core
parenta71be0a36de94b3962c09f871845d745047a78e6 (diff)
downloadspark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.gz
spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.tar.bz2
spark-a8077e5cfc48bdb9f0641d62fe6c01cc8c4f1694.zip
[SPARK-6973] remove skipped stage ID from completed set on the allJobsPage
Though totalStages = allStages - skippedStages is understandable. But consider the problem [SPARK-6973], I think totalStages = allStages is more reasonable. Like "2/1 (2 failed) (1 skipped)", this item also shows the skipped num, it also will be understandable. Author: Xu Tingjun <xutingjun@huawei.com> Author: Xutingjun <xutingjun@huawei.com> Author: meiyoula <1039320815@qq.com> Closes #5550 from XuTingjun/allJobsPage and squashes the following commits: a742541 [Xu Tingjun] delete the loop 40ce94b [Xutingjun] remove stage id from completed set if it retries again 6459238 [meiyoula] delete space 9e23c71 [Xu Tingjun] recover numSkippedStages b987ea7 [Xutingjun] delete skkiped stages from completed set 47525c6 [Xu Tingjun] modify total stages/tasks on the allJobsPage
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala7
-rw-r--r--core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala3
2 files changed, 8 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 1d31fce4c6..730f9806e5 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -282,7 +282,9 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
) {
jobData.numActiveStages -= 1
if (stage.failureReason.isEmpty) {
- jobData.completedStageIndices.add(stage.stageId)
+ if (!stage.submissionTime.isEmpty) {
+ jobData.completedStageIndices.add(stage.stageId)
+ }
} else {
jobData.numFailedStages += 1
}
@@ -315,6 +317,9 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
jobData <- jobIdToData.get(jobId)
) {
jobData.numActiveStages += 1
+
+ // If a stage retries again, it should be removed from completedStageIndices set
+ jobData.completedStageIndices.remove(stage.stageId)
}
}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index 3d96113aa5..f008d40180 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -22,6 +22,7 @@ import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet
+import scala.collection.mutable
import scala.collection.mutable.HashMap
private[spark] object UIData {
@@ -63,7 +64,7 @@ private[spark] object UIData {
/* Stages */
var numActiveStages: Int = 0,
// This needs to be a set instead of a simple count to prevent double-counting of rerun stages:
- var completedStageIndices: OpenHashSet[Int] = new OpenHashSet[Int](),
+ var completedStageIndices: mutable.HashSet[Int] = new mutable.HashSet[Int](),
var numSkippedStages: Int = 0,
var numFailedStages: Int = 0
)