Improve docs for EdgeRDD, EdgeTriplet, and GraphLab

author: Ankur Dave <ankurdave@gmail.com> 2014-01-10 12:37:03 -0800
committer: Ankur Dave <ankurdave@gmail.com> 2014-01-10 12:37:03 -0800
commit: 37611e57f6ca1768529e4038198de56f85f90665 (patch)
tree: dc7b45a90a8544c7165f152603969888ebc67748
parent: eee9bc0958cd3b5efa312d4690f93ff259ca4b39 (diff)
download: spark-37611e57f6ca1768529e4038198de56f85f90665.tar.gz
spark-37611e57f6ca1768529e4038198de56f85f90665.tar.bz2
spark-37611e57f6ca1768529e4038198de56f85f90665.zip
3 files changed, 24 insertions, 35 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index 61228e9628..05d3dbe337 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -9,7 +9,7 @@ import org.apache.spark.storage.StorageLevel
 
 /**
  * `EdgeRDD[ED]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each partition
- * for performance. It is constructed using [[org.apache.spark.graphx.impl.EdgePartitionBuilder]].
+ * for performance. It is constructed using [[impl.EdgePartitionBuilder]].
  */
 class EdgeRDD[@specialized ED: ClassTag](
     val partitionsRDD: RDD[(PartitionID, EdgePartition[ED])])
@@ -20,9 +20,9 @@ class EdgeRDD[@specialized ED: ClassTag](
   override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
 
   /**
-   * If partitionsRDD already has a partitioner, use it. Otherwise assume that the PartitionIDs in
-   * partitionsRDD correspond to the actual partitions and create a new partitioner that allows
-   * co-partitioning with partitionsRDD.
+   * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
+   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+   * partitioner that allows co-partitioning with `partitionsRDD`.
    */
   override val partitioner =
     partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
@@ -33,9 +33,6 @@ class EdgeRDD[@specialized ED: ClassTag](
 
   override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
 
-  /**
-   * Caching a VertexRDD causes the index and values to be cached separately.
-   */
   override def persist(newLevel: StorageLevel): EdgeRDD[ED] = {
     partitionsRDD.persist(newLevel)
     this
@@ -76,5 +73,4 @@ class EdgeRDD[@specialized ED: ClassTag](
   def collectVertexIDs(): RDD[VertexID] = {
     partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) }
   }
-
 }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
index 5e2528925f..057d63a0ac 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
@@ -3,15 +3,10 @@ package org.apache.spark.graphx
 import org.apache.spark.graphx.impl.VertexPartition
 
 /**
- * An edge triplet represents two vertices and edge along with their
- * attributes.
+ * An edge triplet represents an edge along with the vertex attributes of its neighboring vertices.
  *
  * @tparam VD the type of the vertex attribute.
  * @tparam ED the type of the edge attribute
- *
- * @todo specialize edge triplet for basic types, though when I last
- * tried specializing I got a warning about inherenting from a type
- * that is not a trait.
  */
 class EdgeTriplet[VD, ED] extends Edge[ED] {
   /**
@@ -37,8 +32,8 @@ class EdgeTriplet[VD, ED] extends Edge[ED] {
   /**
    * Given one vertex in the edge return the other vertex.
    *
-   * @param vid the id one of the two vertices on the edge.
-   * @return the attribute for the other vertex on the edge.
+   * @param vid the id one of the two vertices on the edge
+   * @return the attribute for the other vertex on the edge
    */
   def otherVertexAttr(vid: VertexID): VD =
     if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr }
@@ -47,7 +42,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] {
    * Get the vertex object for the given vertex in the edge.
    *
    * @param vid the id of one of the two vertices on the edge
-   * @return the attr for the vertex with that id.
+   * @return the attr for the vertex with that id
    */
   def vertexAttr(vid: VertexID): VD =
     if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
index 7efc69c64e..016bab5d02 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
@@ -7,35 +7,33 @@ import scala.collection.JavaConversions._
 import org.apache.spark.rdd.RDD
 
 /**
- * This object implements the GraphLab gather-apply-scatter api.
+ * Implements the GraphLab gather-apply-scatter API.
  */
 object GraphLab extends Logging {
 
   /**
-   * Execute the GraphLab Gather-Apply-Scatter API
+   * Executes the GraphLab Gather-Apply-Scatter API.
    *
-   * @todo finish documenting GraphLab Gather-Apply-Scatter API
-   *
-   * @param graph The graph on which to execute the GraphLab API
-   * @param gatherFunc The gather function is executed on each edge triplet
-   *                   adjacent to a vertex and returns an accumulator which
+   * @param graph the graph on which to execute the GraphLab API
+   * @param gatherFunc executed on each edge triplet
+   *                   adjacent to a vertex. Returns an accumulator which
    *                   is then merged using the merge function.
-   * @param mergeFunc An accumulative associative operation on the result of
+   * @param mergeFunc an accumulative associative operation on the result of
    *                  the gather type.
-   * @param applyFunc Takes a vertex and the final result of the merge operations
+   * @param applyFunc takes a vertex and the final result of the merge operations
    *                  on the adjacent edges and returns a new vertex value.
-   * @param scatterFunc Executed after the apply function the scatter function takes
+   * @param scatterFunc executed after the apply function. Takes
    *                    a triplet and signals whether the neighboring vertex program
    *                    must be recomputed.
-   * @param startVertices predicate to determine which vertices to start the computation on.
-   *                      these will be the active vertices in the first iteration.
-   * @param numIter The maximum number of iterations to run.
-   * @param gatherDirection The direction of edges to consider during the gather phase
-   * @param scatterDirection The direction of edges to consider during the scatter phase
+   * @param startVertices a predicate to determine which vertices to start the computation on.
+   *                      These will be the active vertices in the first iteration.
+   * @param numIter the maximum number of iterations to run
+   * @param gatherDirection the direction of edges to consider during the gather phase
+   * @param scatterDirection the direction of edges to consider during the scatter phase
    *
-   * @tparam VD The graph vertex attribute type
-   * @tparam ED The graph edge attribute type
-   * @tparam A The type accumulated during the gather phase
+   * @tparam VD the graph vertex attribute type
+   * @tparam ED the graph edge attribute type
+   * @tparam A the type accumulated during the gather phase
    * @return the resulting graph after the algorithm converges
    */
   def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
author	Ankur Dave <ankurdave@gmail.com>	2014-01-10 12:37:03 -0800
committer	Ankur Dave <ankurdave@gmail.com>	2014-01-10 12:37:03 -0800
commit	37611e57f6ca1768529e4038198de56f85f90665 (patch)
tree	dc7b45a90a8544c7165f152603969888ebc67748
parent	eee9bc0958cd3b5efa312d4690f93ff259ca4b39 (diff)
download	spark-37611e57f6ca1768529e4038198de56f85f90665.tar.gz spark-37611e57f6ca1768529e4038198de56f85f90665.tar.bz2 spark-37611e57f6ca1768529e4038198de56f85f90665.zip