aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakeshi Yamamuro <linguin.m.s@gmail.com>2014-12-07 19:42:02 -0800
committerAnkur Dave <ankurdave@gmail.com>2014-12-07 19:42:29 -0800
commit6b9e8b081655f71f7ff2c4238254f7aaa110723c (patch)
treecf8fb2b030c2cce983a11551c43bda41c24f08cb
parenta4ae7c8b533b3998484879439c0982170c3c38a7 (diff)
downloadspark-6b9e8b081655f71f7ff2c4238254f7aaa110723c.tar.gz
spark-6b9e8b081655f71f7ff2c4238254f7aaa110723c.tar.bz2
spark-6b9e8b081655f71f7ff2c4238254f7aaa110723c.zip
[SPARK-4620] Add unpersist in Graph and GraphImpl
Add an IF to uncache both vertices and edges of Graph/GraphImpl. This IF is useful when iterative graph operations build a new graph in each iteration, and the vertices and edges of previous iterations are no longer needed for following iterations. Author: Takeshi Yamamuro <linguin.m.s@gmail.com> This patch had conflicts when merged, resolved by Committer: Ankur Dave <ankurdave@gmail.com> Closes #3476 from maropu/UnpersistInGraphSpike and squashes the following commits: 77a006a [Takeshi Yamamuro] Add unpersist in Graph and GraphImpl (cherry picked from commit 8817fc7fe8785d7b11138ca744f22f7e70f1f0a0) Signed-off-by: Ankur Dave <ankurdave@gmail.com>
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/Graph.scala6
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala6
2 files changed, 12 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 23538b7156..84b72b390c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -105,6 +105,12 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
def checkpoint(): Unit
/**
+ * Uncaches both vertices and edges of this graph. This is useful in iterative algorithms that
+ * build a new graph in each iteration.
+ */
+ def unpersist(blocking: Boolean = true): Graph[VD, ED]
+
+ /**
* Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
* algorithms that modify the vertex attributes but reuse the edges. This method can be used to
* uncache the vertex attributes of previous iterations once they are no longer needed, improving
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index a617d84aea..3f4a900d5b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -70,6 +70,12 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
replicatedVertexView.edges.checkpoint()
}
+ override def unpersist(blocking: Boolean = true): Graph[VD, ED] = {
+ unpersistVertices(blocking)
+ replicatedVertexView.edges.unpersist(blocking)
+ this
+ }
+
override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = {
vertices.unpersist(blocking)
// TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone