aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnkur Dave <ankurdave@gmail.com>2014-01-10 12:37:03 -0800
committerAnkur Dave <ankurdave@gmail.com>2014-01-10 12:37:03 -0800
commit37611e57f6ca1768529e4038198de56f85f90665 (patch)
treedc7b45a90a8544c7165f152603969888ebc67748
parenteee9bc0958cd3b5efa312d4690f93ff259ca4b39 (diff)
downloadspark-37611e57f6ca1768529e4038198de56f85f90665.tar.gz
spark-37611e57f6ca1768529e4038198de56f85f90665.tar.bz2
spark-37611e57f6ca1768529e4038198de56f85f90665.zip
Improve docs for EdgeRDD, EdgeTriplet, and GraphLab
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala12
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala13
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala34
3 files changed, 24 insertions, 35 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
index 61228e9628..05d3dbe337 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -9,7 +9,7 @@ import org.apache.spark.storage.StorageLevel
/**
* `EdgeRDD[ED]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each partition
- * for performance. It is constructed using [[org.apache.spark.graphx.impl.EdgePartitionBuilder]].
+ * for performance. It is constructed using [[impl.EdgePartitionBuilder]].
*/
class EdgeRDD[@specialized ED: ClassTag](
val partitionsRDD: RDD[(PartitionID, EdgePartition[ED])])
@@ -20,9 +20,9 @@ class EdgeRDD[@specialized ED: ClassTag](
override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
/**
- * If partitionsRDD already has a partitioner, use it. Otherwise assume that the PartitionIDs in
- * partitionsRDD correspond to the actual partitions and create a new partitioner that allows
- * co-partitioning with partitionsRDD.
+ * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
+ * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+ * partitioner that allows co-partitioning with `partitionsRDD`.
*/
override val partitioner =
partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
@@ -33,9 +33,6 @@ class EdgeRDD[@specialized ED: ClassTag](
override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
- /**
- * Caching a VertexRDD causes the index and values to be cached separately.
- */
override def persist(newLevel: StorageLevel): EdgeRDD[ED] = {
partitionsRDD.persist(newLevel)
this
@@ -76,5 +73,4 @@ class EdgeRDD[@specialized ED: ClassTag](
def collectVertexIDs(): RDD[VertexID] = {
partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) }
}
-
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
index 5e2528925f..057d63a0ac 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
@@ -3,15 +3,10 @@ package org.apache.spark.graphx
import org.apache.spark.graphx.impl.VertexPartition
/**
- * An edge triplet represents two vertices and edge along with their
- * attributes.
+ * An edge triplet represents an edge along with the vertex attributes of its neighboring vertices.
*
* @tparam VD the type of the vertex attribute.
* @tparam ED the type of the edge attribute
- *
- * @todo specialize edge triplet for basic types, though when I last
- * tried specializing I got a warning about inherenting from a type
- * that is not a trait.
*/
class EdgeTriplet[VD, ED] extends Edge[ED] {
/**
@@ -37,8 +32,8 @@ class EdgeTriplet[VD, ED] extends Edge[ED] {
/**
* Given one vertex in the edge return the other vertex.
*
- * @param vid the id one of the two vertices on the edge.
- * @return the attribute for the other vertex on the edge.
+ * @param vid the id one of the two vertices on the edge
+ * @return the attribute for the other vertex on the edge
*/
def otherVertexAttr(vid: VertexID): VD =
if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr }
@@ -47,7 +42,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] {
* Get the vertex object for the given vertex in the edge.
*
* @param vid the id of one of the two vertices on the edge
- * @return the attr for the vertex with that id.
+ * @return the attr for the vertex with that id
*/
def vertexAttr(vid: VertexID): VD =
if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
index 7efc69c64e..016bab5d02 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
@@ -7,35 +7,33 @@ import scala.collection.JavaConversions._
import org.apache.spark.rdd.RDD
/**
- * This object implements the GraphLab gather-apply-scatter api.
+ * Implements the GraphLab gather-apply-scatter API.
*/
object GraphLab extends Logging {
/**
- * Execute the GraphLab Gather-Apply-Scatter API
+ * Executes the GraphLab Gather-Apply-Scatter API.
*
- * @todo finish documenting GraphLab Gather-Apply-Scatter API
- *
- * @param graph The graph on which to execute the GraphLab API
- * @param gatherFunc The gather function is executed on each edge triplet
- * adjacent to a vertex and returns an accumulator which
+ * @param graph the graph on which to execute the GraphLab API
+ * @param gatherFunc executed on each edge triplet
+ * adjacent to a vertex. Returns an accumulator which
* is then merged using the merge function.
- * @param mergeFunc An accumulative associative operation on the result of
+ * @param mergeFunc an accumulative associative operation on the result of
* the gather type.
- * @param applyFunc Takes a vertex and the final result of the merge operations
+ * @param applyFunc takes a vertex and the final result of the merge operations
* on the adjacent edges and returns a new vertex value.
- * @param scatterFunc Executed after the apply function the scatter function takes
+ * @param scatterFunc executed after the apply function. Takes
* a triplet and signals whether the neighboring vertex program
* must be recomputed.
- * @param startVertices predicate to determine which vertices to start the computation on.
- * these will be the active vertices in the first iteration.
- * @param numIter The maximum number of iterations to run.
- * @param gatherDirection The direction of edges to consider during the gather phase
- * @param scatterDirection The direction of edges to consider during the scatter phase
+ * @param startVertices a predicate to determine which vertices to start the computation on.
+ * These will be the active vertices in the first iteration.
+ * @param numIter the maximum number of iterations to run
+ * @param gatherDirection the direction of edges to consider during the gather phase
+ * @param scatterDirection the direction of edges to consider during the scatter phase
*
- * @tparam VD The graph vertex attribute type
- * @tparam ED The graph edge attribute type
- * @tparam A The type accumulated during the gather phase
+ * @tparam VD the graph vertex attribute type
+ * @tparam ED the graph edge attribute type
+ * @tparam A the type accumulated during the gather phase
* @return the resulting graph after the algorithm converges
*/
def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]