aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnkur Dave <ankurdave@gmail.com>2014-01-10 23:46:02 -0800
committerAnkur Dave <ankurdave@gmail.com>2014-01-10 23:46:02 -0800
commit2d7e8d8c48e07adea41f255e357aa750b9ddcdc6 (patch)
tree8b0b88d37050678d892fd81a3e69b0c55a3a7df9
parenta696be1e0114e089cfb252a55c73459a47c8c6df (diff)
downloadspark-2d7e8d8c48e07adea41f255e357aa750b9ddcdc6.tar.gz
spark-2d7e8d8c48e07adea41f255e357aa750b9ddcdc6.tar.bz2
spark-2d7e8d8c48e07adea41f255e357aa750b9ddcdc6.zip
Add GC note to GraphLab
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala6
1 files changed, 6 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
index 016bab5d02..2f828ad807 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
@@ -35,6 +35,12 @@ object GraphLab extends Logging {
* @tparam ED the graph edge attribute type
* @tparam A the type accumulated during the gather phase
* @return the resulting graph after the algorithm converges
+ *
+ * @note Unlike [[Pregel]], this implementation of [[GraphLab]] does not unpersist RDDs from
+ * previous iterations. As a result, long-running iterative GraphLab programs will eventually fill
+ * the Spark cache. Though Spark will evict RDDs from old iterations eventually, garbage
+ * collection will take longer than necessary since it must examine the entire cache. This will be
+ * fixed in a future update.
*/
def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
(graph: Graph[VD, ED], numIter: Int,