A start on scaladoc for the public APIs.

author: Andy Konwinski <andyk@berkeley.edu> 2012-10-08 21:13:29 -0700
committer: Andy Konwinski <andyk@berkeley.edu> 2012-10-08 21:13:29 -0700
commit: bd688940a187d51c3ce5edd7ffc255a536799327 (patch)
tree: c72c3f9b550ffaad9b3986f7f33b860fb34d7833 /core
parent: 1231eb12e675fec47bc2d3139041b1c178a08c37 (diff)
download: spark-bd688940a187d51c3ce5edd7ffc255a536799327.tar.gz
spark-bd688940a187d51c3ce5edd7ffc255a536799327.tar.bz2
spark-bd688940a187d51c3ce5edd7ffc255a536799327.zip
1 files changed, 29 insertions, 6 deletions
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
index f32ff475da..42d5b821f8 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/spark/RDD.scala
@@ -165,12 +165,22 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   // Transformations (return a new RDD)
   
   def map[U: ClassManifest](f: T => U): RDD[U] = new MappedRDD(this, sc.clean(f))
-  
+
+  /**
+   *  Return a new RDD by first applying a function to all elements of this
+   *  RDD, and then flattening the results.
+   */
   def flatMap[U: ClassManifest](f: T => TraversableOnce[U]): RDD[U] =
     new FlatMappedRDD(this, sc.clean(f))
-  
+
+  /**
+   * Return a new RDD containing only the elements that satisfy a predicate.
+   */
   def filter(f: T => Boolean): RDD[T] = new FilteredRDD(this, sc.clean(f))
 
+  /**
+   * Return a new RDD containing the distinct elements in this RDD.
+   */
   def distinct(numSplits: Int = splits.size): RDD[T] =
     map(x => (x, null)).reduceByKey((x, y) => x, numSplits).map(_._1)
 
@@ -247,12 +257,18 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
     new MapPartitionsWithSplitRDD(this, sc.clean(f))
 
   // Actions (launch a job to return a value to the user program)
-  
+
+  /**
+   * Applies a function f to all elements of this RDD.
+   */
   def foreach(f: T => Unit) {
     val cleanF = sc.clean(f)
     sc.runJob(this, (iter: Iterator[T]) => iter.foreach(cleanF))
   }
 
+  /**
+   * Return an array that contains all of the elements in this RDD.
+   */
   def collect(): Array[T] = {
     val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray)
     Array.concat(results: _*)
@@ -308,7 +324,10 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
         (iter: Iterator[T]) => iter.aggregate(zeroValue)(cleanSeqOp, cleanCombOp))
     return results.fold(zeroValue)(cleanCombOp)
   }
-  
+
+  /**
+   * Return the number of elements in the RDD.
+   */
   def count(): Long = {
     sc.runJob(this, (iter: Iterator[T]) => {
       var result = 0L
@@ -337,8 +356,9 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
   }
 
   /**
-   * Count elements equal to each value, returning a map of (value, count) pairs. The final combine
-   * step happens locally on the master, equivalent to running a single reduce task.
+   * Return the count of each unique value in this RDD as a map of
+   * (value, count) pairs. The final combine step happens locally on the
+   * master, equivalent to running a single reduce task.
    *
    * TODO: This should perhaps be distributed by default.
    */
@@ -404,6 +424,9 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
     return buf.toArray
   }
 
+  /*
+   * Return the first element in this RDD.
+   */
   def first(): T = take(1) match {
     case Array(t) => t
     case _ => throw new UnsupportedOperationException("empty collection")
author	Andy Konwinski <andyk@berkeley.edu>	2012-10-08 21:13:29 -0700
committer	Andy Konwinski <andyk@berkeley.edu>	2012-10-08 21:13:29 -0700
commit	bd688940a187d51c3ce5edd7ffc255a536799327 (patch)
tree	c72c3f9b550ffaad9b3986f7f33b860fb34d7833 /core
parent	1231eb12e675fec47bc2d3139041b1c178a08c37 (diff)
download	spark-bd688940a187d51c3ce5edd7ffc255a536799327.tar.gz spark-bd688940a187d51c3ce5edd7ffc255a536799327.tar.bz2 spark-bd688940a187d51c3ce5edd7ffc255a536799327.zip