diff options
author | xutingjun <xutingjun@huawei.com> | 2015-06-17 22:31:01 -0700 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2015-06-17 22:31:01 -0700 |
commit | e2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c (patch) | |
tree | 81c590f876f54b0268e1ee8e3c3a94cad4435b60 | |
parent | 22732e1eca730929345e440ba831386ee7446b74 (diff) | |
download | spark-e2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c.tar.gz spark-e2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c.tar.bz2 spark-e2cdb0568b14df29bbdb1ee9a13ee361c9ddad9c.zip |
[SPARK-8392] RDDOperationGraph: getting cached nodes is slow
```def getAllNodes: Seq[RDDOperationNode] =
{ _childNodes ++ _childClusters.flatMap(_.childNodes) }```
when the ```_childClusters``` has so many nodes, the process will hang on. I think we can improve the efficiency here.
Author: xutingjun <xutingjun@huawei.com>
Closes #6839 from XuTingjun/DAGImprove and squashes the following commits:
53b03ea [xutingjun] change code to more concise and easier to read
f98728b [xutingjun] fix words: node -> nodes
f87c663 [xutingjun] put the filter inside
81f9fd2 [xutingjun] put the filter inside
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala | 6 |
2 files changed, 4 insertions, 4 deletions
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 65162f4fdc..7898039519 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -362,7 +362,7 @@ private[spark] object UIUtils extends Logging { { g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } } { g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } } { - g.rootCluster.getAllNodes.filter(_.cached).map { n => + g.rootCluster.getCachedNodes.map { n => <div class="cached-rdd">{n.id}</div> } } diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala index d6a5085db1..ffea9817c0 100644 --- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala +++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala @@ -66,9 +66,9 @@ private[ui] class RDDOperationCluster(val id: String, private var _name: String) _childClusters += childCluster } - /** Return all the nodes container in this cluster, including ones nested in other clusters. */ - def getAllNodes: Seq[RDDOperationNode] = { - _childNodes ++ _childClusters.flatMap(_.childNodes) + /** Return all the nodes which are cached. */ + def getCachedNodes: Seq[RDDOperationNode] = { + _childNodes.filter(_.cached) ++ _childClusters.flatMap(_.getCachedNodes) } } |