aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala6
1 files changed, 6 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
index 016bab5d02..2f828ad807 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
@@ -35,6 +35,12 @@ object GraphLab extends Logging {
* @tparam ED the graph edge attribute type
* @tparam A the type accumulated during the gather phase
* @return the resulting graph after the algorithm converges
+ *
+ * @note Unlike [[Pregel]], this implementation of [[GraphLab]] does not unpersist RDDs from
+ * previous iterations. As a result, long-running iterative GraphLab programs will eventually fill
+ * the Spark cache. Though Spark will evict RDDs from old iterations eventually, garbage
+ * collection will take longer than necessary since it must examine the entire cache. This will be
+ * fixed in a future update.
*/
def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
(graph: Graph[VD, ED], numIter: Int,