aboutsummaryrefslogtreecommitdiff
path: root/graphx/src/main
diff options
context:
space:
mode:
authorGuoQiang Li <witgo@qq.com>2014-12-06 00:56:51 -0800
committerAnkur Dave <ankurdave@gmail.com>2014-12-06 00:56:51 -0800
commite895e0cbecbbec1b412ff21321e57826d2d0a982 (patch)
tree7f75e1a51a0707a2ebf97c2e0f32c50058a98701 /graphx/src/main
parent6eb1b6f6204ea3c8083af3fb9cd990d9f3dac89d (diff)
downloadspark-e895e0cbecbbec1b412ff21321e57826d2d0a982.tar.gz
spark-e895e0cbecbbec1b412ff21321e57826d2d0a982.tar.bz2
spark-e895e0cbecbbec1b412ff21321e57826d2d0a982.zip
[SPARK-3623][GraphX] GraphX should support the checkpoint operation
Author: GuoQiang Li <witgo@qq.com> Closes #2631 from witgo/SPARK-3623 and squashes the following commits: a70c500 [GuoQiang Li] Remove java related 4d1e249 [GuoQiang Li] Add comments e682724 [GuoQiang Li] Graph should support the checkpoint operation
Diffstat (limited to 'graphx/src/main')
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/Graph.scala8
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala5
2 files changed, 13 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 6377915435..23538b7156 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -97,6 +97,14 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
def cache(): Graph[VD, ED]
/**
+ * Mark this Graph for checkpointing. It will be saved to a file inside the checkpoint
+ * directory set with SparkContext.setCheckpointDir() and all references to its parent
+ * RDDs will be removed. It is strongly recommended that this Graph is persisted in
+ * memory, otherwise saving it on a file will require recomputation.
+ */
+ def checkpoint(): Unit
+
+ /**
* Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
* algorithms that modify the vertex attributes but reuse the edges. This method can be used to
* uncache the vertex attributes of previous iterations once they are no longer needed, improving
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 0eae2a6738..a617d84aea 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -65,6 +65,11 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
this
}
+ override def checkpoint(): Unit = {
+ vertices.checkpoint()
+ replicatedVertexView.edges.checkpoint()
+ }
+
override def unpersistVertices(blocking: Boolean = true): Graph[VD, ED] = {
vertices.unpersist(blocking)
// TODO: unpersist the replicated vertices in `replicatedVertexView` but leave the edges alone