diff options
author | JerryLead <JerryLead@163.com> | 2014-12-02 17:14:11 -0800 |
---|---|---|
committer | Ankur Dave <ankurdave@gmail.com> | 2014-12-02 17:14:11 -0800 |
commit | 17c162f6682520e6e2790626e37da3a074471793 (patch) | |
tree | a309f7f10f1fe7798e823daea6a35f5b5a949990 /graphx | |
parent | fc0a1475ef7c8b33363d88adfe8e8f28def5afc7 (diff) | |
download | spark-17c162f6682520e6e2790626e37da3a074471793.tar.gz spark-17c162f6682520e6e2790626e37da3a074471793.tar.bz2 spark-17c162f6682520e6e2790626e37da3a074471793.zip |
[SPARK-4672][GraphX]Non-transient PartitionsRDDs will lead to StackOverflow error
The related JIRA is https://issues.apache.org/jira/browse/SPARK-4672
In a nutshell, if `val partitionsRDD` in EdgeRDDImpl and VertexRDDImpl are non-transient, the serialization chain can become very long in iterative algorithms and finally lead to the StackOverflow error. More details and explanation can be found in the JIRA.
Author: JerryLead <JerryLead@163.com>
Author: Lijie Xu <csxulijie@gmail.com>
Closes #3544 from JerryLead/my_graphX and squashes the following commits:
628f33c [JerryLead] set PartitionsRDD to be transient in EdgeRDDImpl and VertexRDDImpl
c0169da [JerryLead] Merge branch 'master' of https://github.com/apache/spark
52799e3 [Lijie Xu] Merge pull request #1 from apache/master
Diffstat (limited to 'graphx')
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala | 2 | ||||
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala | 2 |
2 files changed, 2 insertions, 2 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala index 504559da97..897c7ee12a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala @@ -26,7 +26,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] ( - override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], + @transient override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) extends EdgeRDD[ED](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) { diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala index c8898b1369..9732c5b00c 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala @@ -27,7 +27,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.graphx._ class VertexRDDImpl[VD] private[graphx] ( - val partitionsRDD: RDD[ShippableVertexPartition[VD]], + @transient val partitionsRDD: RDD[ShippableVertexPartition[VD]], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) (implicit override protected val vdTag: ClassTag[VD]) extends VertexRDD[VD](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) { |