diff options
author | Takeshi Yamamuro <linguin.m.s@gmail.com> | 2014-12-07 19:42:02 -0800 |
---|---|---|
committer | Ankur Dave <ankurdave@gmail.com> | 2014-12-07 19:42:29 -0800 |
commit | 6b9e8b081655f71f7ff2c4238254f7aaa110723c (patch) | |
tree | cf8fb2b030c2cce983a11551c43bda41c24f08cb | |
parent | a4ae7c8b533b3998484879439c0982170c3c38a7 (diff) | |
download | spark-6b9e8b081655f71f7ff2c4238254f7aaa110723c.tar.gz spark-6b9e8b081655f71f7ff2c4238254f7aaa110723c.tar.bz2 spark-6b9e8b081655f71f7ff2c4238254f7aaa110723c.zip |
[SPARK-4620] Add unpersist in Graph and GraphImpl
Add an IF to uncache both vertices and edges of Graph/GraphImpl.
This IF is useful when iterative graph operations build a new graph in each iteration, and the vertices and edges of previous iterations are no longer needed for following iterations.
Author: Takeshi Yamamuro <linguin.m.s@gmail.com>
This patch had conflicts when merged, resolved by
Committer: Ankur Dave <ankurdave@gmail.com>
Closes #3476 from maropu/UnpersistInGraphSpike and squashes the following commits:
77a006a [Takeshi Yamamuro] Add unpersist in Graph and GraphImpl
(cherry picked from commit 8817fc7fe8785d7b11138ca744f22f7e70f1f0a0)
Signed-off-by: Ankur Dave <ankurdave@gmail.com>
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/Graph.scala | 6 | ||||
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala | 6 |
2 files changed, 12 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 23538b7156..84b72b390c 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -105,6 +105,12 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab def checkpoint(): Unit /** + * Uncaches both vertices and edges of this graph. This is useful in iterative algorithms that + * build a new graph in each iteration. + */ + def unpersist(blocking: Boolean = true): Graph[VD, ED] + + /** * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative * algorithms that modify the vertex attributes but reuse the edges. This method can be used to * uncache the vertex attributes of previous iterations once they are no longer needed, improving diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala index a617d84aea..3f4a900d5b 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala @@ -70,6 +70,12 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected ( replicatedVertexView.edges.checkpoint() } + override def unpersist(blocking: Boolean = true): Graph[VD, ED] = { + unpersistVertices(blocking) + replicatedVertexView.edges.unpersist(blocking) + this + } + override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = { vertices.unpersist(blocking) // TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone |