diff options
author | GuoQiang Li <witgo@qq.com> | 2014-12-06 00:56:51 -0800 |
---|---|---|
committer | Ankur Dave <ankurdave@gmail.com> | 2014-12-06 00:56:51 -0800 |
commit | e895e0cbecbbec1b412ff21321e57826d2d0a982 (patch) | |
tree | 7f75e1a51a0707a2ebf97c2e0f32c50058a98701 /graphx/src/main/scala | |
parent | 6eb1b6f6204ea3c8083af3fb9cd990d9f3dac89d (diff) | |
download | spark-e895e0cbecbbec1b412ff21321e57826d2d0a982.tar.gz spark-e895e0cbecbbec1b412ff21321e57826d2d0a982.tar.bz2 spark-e895e0cbecbbec1b412ff21321e57826d2d0a982.zip |
[SPARK-3623][GraphX] GraphX should support the checkpoint operation
Author: GuoQiang Li <witgo@qq.com>
Closes #2631 from witgo/SPARK-3623 and squashes the following commits:
a70c500 [GuoQiang Li] Remove java related
4d1e249 [GuoQiang Li] Add comments
e682724 [GuoQiang Li] Graph should support the checkpoint operation
Diffstat (limited to 'graphx/src/main/scala')
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/Graph.scala | 8 | ||||
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala | 5 |
2 files changed, 13 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 6377915435..23538b7156 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -97,6 +97,14 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab def cache(): Graph[VD, ED] /** + * Mark this Graph for checkpointing. It will be saved to a file inside the checkpoint + * directory set with SparkContext.setCheckpointDir() and all references to its parent + * RDDs will be removed. It is strongly recommended that this Graph is persisted in + * memory, otherwise saving it on a file will require recomputation. + */ + def checkpoint(): Unit + + /** * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative * algorithms that modify the vertex attributes but reuse the edges. This method can be used to * uncache the vertex attributes of previous iterations once they are no longer needed, improving diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala index 0eae2a6738..a617d84aea 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala @@ -65,6 +65,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected ( this } + override def checkpoint(): Unit = { + vertices.checkpoint() + replicatedVertexView.edges.checkpoint() + } + override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = { vertices.unpersist(blocking) // TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone |