diff options
author | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-10-06 23:59:55 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-10-07 00:05:37 -0700 |
commit | 4f72066a9ab02308f733bf248b1ca003abcc0874 (patch) | |
tree | 3b81db66ca8b7d83814cbb02d61e608eab974682 /core | |
parent | 3f2571fe9858c020544be2248ec1fa641f66ca3f (diff) | |
download | spark-4f72066a9ab02308f733bf248b1ca003abcc0874.tar.gz spark-4f72066a9ab02308f733bf248b1ca003abcc0874.tar.bz2 spark-4f72066a9ab02308f733bf248b1ca003abcc0874.zip |
Document the Dependency classes.
Diffstat (limited to 'core')
-rw-r--r-- | core/src/main/scala/spark/Dependency.scala | 25 | ||||
-rw-r--r-- | core/src/main/scala/spark/rdd/UnionRDD.scala | 2 |
2 files changed, 26 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala index 979da9151f..f2b7aa33ec 100644 --- a/core/src/main/scala/spark/Dependency.scala +++ b/core/src/main/scala/spark/Dependency.scala @@ -1,11 +1,26 @@ package spark +/** + * Base class for dependencies. + */ abstract class Dependency[T](val rdd: RDD[T]) extends Serializable +/** + * Base class for dependencies where each partition of the parent RDD is used by at most one + * partition of the child RDD. Narrow dependencies allow for pipelined execution. + */ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) { + /** + * Get the parent partitions for a child partition. + * @param outputPartition a partition of the child RDD + * @return the partitions of the parent RDD that the child partition depends upon + */ def getParents(outputPartition: Int): Seq[Int] } +/** + * Represents a dependency on the output of a shuffle stage. + */ class ShuffleDependency[K, V, C]( val shuffleId: Int, @transient rdd: RDD[(K, V)], @@ -13,10 +28,20 @@ class ShuffleDependency[K, V, C]( val partitioner: Partitioner) extends Dependency(rdd) +/** + * Represents a one-to-one dependency between partitions of the parent and child RDDs. + */ class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) { override def getParents(partitionId: Int) = List(partitionId) } +/** + * Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs. + * @param rdd the parent RDD + * @param inStart the start of the range in the parent RDD + * @param outStart the start of the range in the child RDD + * @param length the length of the range + */ class RangeDependency[T](rdd: RDD[T], inStart: Int, outStart: Int, length: Int) extends NarrowDependency[T](rdd) { diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/spark/rdd/UnionRDD.scala index 4ba2848491..f0b9225f7c 100644 --- a/core/src/main/scala/spark/rdd/UnionRDD.scala +++ b/core/src/main/scala/spark/rdd/UnionRDD.scala @@ -43,7 +43,7 @@ class UnionRDD[T: ClassManifest]( override val dependencies = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 - for ((rdd, index) <- rdds.zipWithIndex) { + for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.splits.size) pos += rdd.splits.size } |