aboutsummaryrefslogtreecommitdiff
path: root/core/src/main
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2012-10-06 23:59:55 -0700
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2012-10-07 00:05:37 -0700
commit4f72066a9ab02308f733bf248b1ca003abcc0874 (patch)
tree3b81db66ca8b7d83814cbb02d61e608eab974682 /core/src/main
parent3f2571fe9858c020544be2248ec1fa641f66ca3f (diff)
downloadspark-4f72066a9ab02308f733bf248b1ca003abcc0874.tar.gz
spark-4f72066a9ab02308f733bf248b1ca003abcc0874.tar.bz2
spark-4f72066a9ab02308f733bf248b1ca003abcc0874.zip
Document the Dependency classes.
Diffstat (limited to 'core/src/main')
-rw-r--r--core/src/main/scala/spark/Dependency.scala25
-rw-r--r--core/src/main/scala/spark/rdd/UnionRDD.scala2
2 files changed, 26 insertions, 1 deletions
diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/spark/Dependency.scala
index 979da9151f..f2b7aa33ec 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/spark/Dependency.scala
@@ -1,11 +1,26 @@
package spark
+/**
+ * Base class for dependencies.
+ */
abstract class Dependency[T](val rdd: RDD[T]) extends Serializable
+/**
+ * Base class for dependencies where each partition of the parent RDD is used by at most one
+ * partition of the child RDD. Narrow dependencies allow for pipelined execution.
+ */
abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
+ /**
+ * Get the parent partitions for a child partition.
+ * @param outputPartition a partition of the child RDD
+ * @return the partitions of the parent RDD that the child partition depends upon
+ */
def getParents(outputPartition: Int): Seq[Int]
}
+/**
+ * Represents a dependency on the output of a shuffle stage.
+ */
class ShuffleDependency[K, V, C](
val shuffleId: Int,
@transient rdd: RDD[(K, V)],
@@ -13,10 +28,20 @@ class ShuffleDependency[K, V, C](
val partitioner: Partitioner)
extends Dependency(rdd)
+/**
+ * Represents a one-to-one dependency between partitions of the parent and child RDDs.
+ */
class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {
override def getParents(partitionId: Int) = List(partitionId)
}
+/**
+ * Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs.
+ * @param rdd the parent RDD
+ * @param inStart the start of the range in the parent RDD
+ * @param outStart the start of the range in the child RDD
+ * @param length the length of the range
+ */
class RangeDependency[T](rdd: RDD[T], inStart: Int, outStart: Int, length: Int)
extends NarrowDependency[T](rdd) {
diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/spark/rdd/UnionRDD.scala
index 4ba2848491..f0b9225f7c 100644
--- a/core/src/main/scala/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/spark/rdd/UnionRDD.scala
@@ -43,7 +43,7 @@ class UnionRDD[T: ClassManifest](
override val dependencies = {
val deps = new ArrayBuffer[Dependency[_]]
var pos = 0
- for ((rdd, index) <- rdds.zipWithIndex) {
+ for (rdd <- rdds) {
deps += new RangeDependency(rdd, 0, pos, rdd.splits.size)
pos += rdd.splits.size
}