From 17c162f6682520e6e2790626e37da3a074471793 Mon Sep 17 00:00:00 2001 From: JerryLead Date: Tue, 2 Dec 2014 17:14:11 -0800 Subject: [SPARK-4672][GraphX]Non-transient PartitionsRDDs will lead to StackOverflow error The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672 In a nutshell, if `val partitionsRDD` in EdgeRDDImpl and VertexRDDImpl are non-transient, the serialization chain can become very long in iterative algorithms and finally lead to the StackOverflow error. More details and explanation can be found in the JIRA. Author: JerryLead Author: Lijie Xu Closes #3544 from JerryLead/my_graphX and squashes the following commits: 628f33c [JerryLead] set PartitionsRDD to be transient in EdgeRDDImpl and VertexRDDImpl c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark 52799e3 [Lijie Xu] Merge pull request #1 from apache/master --- graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala | 2 +- graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'graphx') diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala index 504559da97..897c7ee12a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala @@ -26,7 +26,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] ( - override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], + @transient override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) extends EdgeRDD[ED](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) { diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala index c8898b1369..9732c5b00c 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala @@ -27,7 +27,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ class VertexRDDImpl[VD] private[graphx] ( - val partitionsRDD: RDD[ShippableVertexPartition[VD]], + @transient val partitionsRDD: RDD[ShippableVertexPartition[VD]], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) (implicit override protected val vdTag: ClassTag[VD]) extends VertexRDD[VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) { -- cgit v1.2.3