aboutsummaryrefslogtreecommitdiff
path: root/graphx
diff options
context:
space:
mode:
authorJerryLead <JerryLead@163.com>2014-12-02 17:14:11 -0800
committerAnkur Dave <ankurdave@gmail.com>2014-12-02 17:14:11 -0800
commit17c162f6682520e6e2790626e37da3a074471793 (patch)
treea309f7f10f1fe7798e823daea6a35f5b5a949990 /graphx
parentfc0a1475ef7c8b33363d88adfe8e8f28def5afc7 (diff)
downloadspark-17c162f6682520e6e2790626e37da3a074471793.tar.gz
spark-17c162f6682520e6e2790626e37da3a074471793.tar.bz2
spark-17c162f6682520e6e2790626e37da3a074471793.zip
[SPARK-4672][GraphX]Non-transient PartitionsRDDs will lead to StackOverflow error
The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 In a nutshell, if `val partitionsRDD` in EdgeRDDImpl and VertexRDDImpl are non-transient, the serialization chain can become very long in iterative algorithms and finally lead to the StackOverflow error. More details and explanation can be found in the JIRA. Author: JerryLead <JerryLead@163.com> Author: Lijie Xu <csxulijie@gmail.com> Closes #3544 from JerryLead/my_graphX and squashes the following commits: 628f33c [JerryLead] set PartitionsRDD to be transient in EdgeRDDImpl and VertexRDDImpl c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master
Diffstat (limited to 'graphx')
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala2
2 files changed, 2 insertions, 2 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index 504559da97..897c7ee12a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -26,7 +26,7 @@ import org.apache.spark.storage.StorageLevel
import org.apache.spark.graphx._
class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
- override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])],
+ @transient override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])],
val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
extends EdgeRDD[ED](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index c8898b1369..9732c5b00c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -27,7 +27,7 @@ import org.apache.spark.storage.StorageLevel
import org.apache.spark.graphx._
class VertexRDDImpl[VD] private[graphx] (
- val partitionsRDD: RDD[ShippableVertexPartition[VD]],
+ @transient val partitionsRDD: RDD[ShippableVertexPartition[VD]],
val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY)
(implicit override protected val vdTag: ClassTag[VD])
extends VertexRDD[VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {