Finish cleaning up Graph docs

author: Ankur Dave <ankurdave@gmail.com> 2014-01-10 12:17:04 -0800
committer: Ankur Dave <ankurdave@gmail.com> 2014-01-10 12:17:04 -0800
commit: 5fcd2a61b4f10d039336e2ca3716e99c0d96ded2 (patch)
tree: 217723eb3d24f88b2ba624160a0a4189b6bd821e
parent: 4c114a7556c30b186f27a3f7faa9dd7a20b223e1 (diff)
download: spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.tar.gz
spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.tar.bz2
spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.zip
1 files changed, 82 insertions, 98 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index aeeaddb320..a188de5537 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -15,8 +15,8 @@ import org.apache.spark.storage.StorageLevel
  * RDDs, the graph is a functional data-structure in which mutating
  * operations return new graphs.
  *
- * @note [[org.apache.spark.graphx.GraphOps]] contains additional convenience operations.
- * [[org.apache.spark.graphx.algorithms.Algorithms]] contains graph algorithms; to access these,
+ * @note [[GraphOps]] contains additional convenience operations.
+ * [[algorithms.Algorithms]] contains graph algorithms; to access these,
  * import `org.apache.spark.graphx.algorithms._`.
  *
  * @tparam VD the vertex attribute type
@@ -25,32 +25,31 @@ import org.apache.spark.storage.StorageLevel
 abstract class Graph[VD: ClassTag, ED: ClassTag] {
 
   /**
-   * Get the vertices and their data.
+   * An RDD containing the vertices and their associated attributes.
    *
    * @note vertex ids are unique.
-   * @return An RDD containing the vertices in this graph
+   * @return an RDD containing the vertices in this graph
    */
   val vertices: VertexRDD[VD]
 
   /**
-   * Get the Edges and their data as an RDD.  The entries in the RDD
-   * contain just the source id and target id along with the edge
-   * data.
+   * An RDD containing the edges and their associated attributes.  The entries in the RDD contain
+   * just the source id and target id along with the edge data.
    *
-   * @return An RDD containing the edges in this graph
+   * @return an RDD containing the edges in this graph
    *
-   * @see [[org.apache.spark.graphx.Edge]] for the edge type.
-   * @see `triplets` to get an RDD which contains all the edges
+   * @see [[Edge]] for the edge type.
+   * @see [[triplets]] to get an RDD which contains all the edges
    * along with their vertex data.
    *
    */
   val edges: EdgeRDD[ED]
 
   /**
-   * Get the edges with the vertex data associated with the adjacent
-   * pair of vertices.
+   * An RDD containing the edge triplets, which are edges along with the vertex data associated with
+   * the adjacent vertices.
    *
-   * @return An RDD containing edge triplets.
+   * @return an RDD containing edge triplets
    *
    * @example This operation might be used to evaluate a graph
    * coloring where we would like to check that both vertices are a
@@ -68,7 +67,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
   val triplets: RDD[EdgeTriplet[VD, ED]]
 
   /**
-   * Cache the vertices and edges associated with this graph.
+   * Caches the vertices and edges associated with this graph at the specified storage level.
    *
    * @param newLevel the level at which to cache the graph.
 
@@ -78,14 +77,14 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
   def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
 
   /**
-   * Return a graph that is cached when first created. This is used to
+   * Caches the vertices and edges associated with this graph. This is used to
    * pin a graph in memory enabling multiple queries to reuse the same
    * construction process.
    */
   def cache(): Graph[VD, ED]
 
   /**
-   * Uncache only the vertices of this graph, leaving the edges alone. This is useful in iterative
+   * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
    * algorithms that modify the vertex attributes but reuse the edges. This method can be used to
    * uncache the vertex attributes of previous iterations once they are no longer needed, improving
    * GC performance.
@@ -93,23 +92,22 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
   def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
 
   /**
-   * Repartition the edges in the graph according to `partitionStrategy`.
+   * Repartitions the edges in the graph according to `partitionStrategy`.
    */
   def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
 
   /**
-   * Compute statistics describing the graph representation.
+   * Computes statistics describing the graph representation.
    */
   def statistics: Map[String, Any]
 
   /**
-   * Construct a new graph where each vertex value has been
-   * transformed by the map function.
+   * Transforms each vertex attribute in the graph using the map function.
    *
    * @note The new graph has the same structure.  As a consequence the underlying index structures
    * can be reused.
    *
-   * @param map the function from a vertex object to a new vertex value.
+   * @param map the function from a vertex object to a new vertex value
    *
    * @tparam VD2 the new vertex data type
    *
@@ -125,10 +123,9 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
   def mapVertices[VD2: ClassTag](map: (VertexID, VD) => VD2): Graph[VD2, ED]
 
   /**
-   * Construct a new graph where the value of each edge is
-   * transformed by the map operation.  This function is not passed
-   * the vertex value for the vertices adjacent to the edge.  If
-   * vertex values are desired, use the `mapTriplets` method.
+   * Transforms each edge attribute in the graph using the map function.  The map function is not
+   * passed the vertex value for the vertices adjacent to the edge.  If vertex values are desired,
+   * use [[mapTriplets]].
    *
    * @note This graph is not changed and that the new graph has the
    * same structure.  As a consequence the underlying index structures
@@ -147,19 +144,19 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
   }
 
   /**
-   * Construct a new graph transforming the value of each edge using the user defined iterator
-   * transform.  The iterator transform is given an iterator over edges within a logical partition
-   * as well as the partition's ID, and it should return a new iterator over the new values of each
-   * edge. The new iterator's elements must correspond one-to-one with the old iterator's
-   * elements. If adjacent vertex values are desired, use the `mapTriplets` method.
+   * Transforms each edge attribute using the map function, passing it a whole partition at a
+   * time. The map function is given an iterator over edges within a logical partition as well as
+   * the partition's ID, and it should return a new iterator over the new values of each edge. The
+   * new iterator's elements must correspond one-to-one with the old iterator's elements. If
+   * adjacent vertex values are desired, use [[mapTriplets]].
    *
    * @note This does not change the structure of the
    * graph or modify the values of this graph.  As a consequence
    * the underlying index structures can be reused.
    *
-   * @param map the function which takes a partition id and an iterator
-   * over all the edges in the partition and must return an iterator over
-   * the new values for each edge in the order of the input iterator.
+   * @param map a function that takes a partition id and an iterator
+   * over all the edges in the partition, and must return an iterator over
+   * the new values for each edge in the order of the input iterator
    *
    * @tparam ED2 the new edge data type
    *
@@ -168,11 +165,8 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
       map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
 
   /**
-   * Construct a new graph where the value of each edge is
-   * transformed by the map operation.  This function passes vertex
-   * values for the adjacent vertices to the map function.  If
-   * adjacent vertex values are not required, consider using the
-   * `mapEdges` method instead.
+   * Transforms each edge attribute using the map function, passing it the adjacent vertex attributes
+   * as well. If adjacent vertex values are not required, consider using [[mapEdges]] instead.
    *
    * @note This does not change the structure of the
    * graph or modify the values of this graph.  As a consequence
@@ -196,21 +190,17 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
   }
 
   /**
-   * Construct a new graph transforming the value of each edge using
-   * the user defined iterator transform.  The iterator transform is
-   * given an iterator over edge triplets within a logical partition
-   * and should yield a new iterator over the new values of each edge
-   * in the order in which they are provided to the iterator transform
-   * If adjacent vertex values are not required, consider using the
-   * mapEdges function instead.
-   *
-   * @note This that this does not change the structure of the
+   * Transforms each edge attribute a partition at a time using the map function, passing it the
+   * adjacent vertex attributes as well. The map function is given an iterator over edge triplets
+   * within a logical partition and should yield a new iterator over the new values of each edge in
+   * the order in which they are provided.  If adjacent vertex values are not required, consider
+   * using [[mapEdges]] instead.
+   *
+   * @note This does not change the structure of the
    * graph or modify the values of this graph.  As a consequence
    * the underlying index structures can be reused.
    *
-   * @param map the function which takes a partition id and an iterator
-   * over all the edges in the partition and must return an iterator over
-   * the new values for each edge in the order of the input iterator.
+   * @param map the iterator transform
    *
    * @tparam ED2 the new edge data type
    *
@@ -220,86 +210,82 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
     Graph[VD, ED2]
 
   /**
-   * Construct a new graph with all the edges reversed.  If this graph
-   * contains an edge from a to b then the returned graph contains an
-   * edge from b to a.
+   * Reverses all edges in the graph.  If this graph contains an edge from a to b then the returned
+   * graph contains an edge from b to a.
    */
   def reverse: Graph[VD, ED]
 
   /**
-   * This function takes a vertex and edge predicate and constructs
-   * the subgraph that consists of vertices and edges that satisfy the
-   * predict.  The resulting graph contains the vertices and edges
-   * that satisfy:
+   * Restricts the graph to only the vertices and edges satisfying the predicates. The resulting
+   * subgraph satisifies
    *
    * {{{
    * V' = {v : for all v in V where vpred(v)}
    * E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
    * }}}
    *
-   * @param epred the edge predicate which takes a triplet and
+   * @param epred the edge predicate, which takes a triplet and
    * evaluates to true if the edge is to remain in the subgraph.  Note
-   * that only edges in which both vertices satisfy the vertex
+   * that only edges where both vertices satisfy the vertex
    * predicate are considered.
    *
-   * @param vpred the vertex predicate which takes a vertex object and
+   * @param vpred the vertex predicate, which takes a vertex object and
    * evaluates to true if the vertex is to be included in the subgraph
    *
    * @return the subgraph containing only the vertices and edges that
-   * satisfy the predicates.
+   * satisfy the predicates
    */
   def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
     vpred: (VertexID, VD) => Boolean = ((v,d) => true) ): Graph[VD, ED]
 
   /**
-   * Subgraph of this graph with only vertices and edges from the other graph.
+   * Restricts the graph to only the vertices and edges that are also in `other`, but keeps the
+   * attributes from this graph.
    * @param other the graph to project this graph onto
-   * @return a graph with vertices and edges that exists in both the current graph and other,
-   * with vertex and edge data from the current graph.
+   * @return a graph with vertices and edges that exist in both the current graph and `other`,
+   * with vertex and edge data from the current graph
    */
   def mask[VD2: ClassTag, ED2: ClassTag](other: Graph[VD2, ED2]): Graph[VD, ED]
 
   /**
-   * This function merges multiple edges between two vertices into a single Edge. For correct
-   * results, the graph must have been partitioned using partitionBy.
+   * Merges multiple edges between two vertices into a single edge. For correct results, the graph
+   * must have been partitioned using [[partitionBy]].
    *
    * @tparam ED2 the type of the resulting edge data after grouping.
    *
-   * @param f the user supplied commutative associative function to merge edge attributes for
+   * @param f the user-supplied commutative associative function to merge edge attributes for
    * duplicate edges.
    *
-   * @return Graph[VD,ED2] The resulting graph with a single Edge for each source, dest vertex pair.
+   * @return The resulting graph with a single edge for each (source, dest) vertex pair.
    */
   def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]
 
   /**
-   * The mapReduceTriplets function is used to compute statistics
-   * about the neighboring edges and vertices of each vertex.  The
-   * user supplied `mapFunc` function is invoked on each edge of the
-   * graph generating 0 or more "messages" to be "sent" to either
-   * vertex in the edge.  The `reduceFunc` is then used to combine the
-   * output of the map phase destined to each vertex.
+   * Computes statistics about the neighboring edges and vertices of each vertex.  The user supplied
+   * `mapFunc` function is invoked on each edge of the graph, generating 0 or more "messages" to be
+   * "sent" to either vertex in the edge.  The `reduceFunc` is then used to combine the output of
+   * the map phase destined to each vertex.
    *
    * @tparam A the type of "message" to be sent to each vertex
    *
    * @param mapFunc the user defined map function which returns 0 or
-   * more messages to neighboring vertices.
+   * more messages to neighboring vertices
    *
    * @param reduceFunc the user defined reduce function which should
    * be commutative and assosciative and is used to combine the output
-   * of the map phase.
+   * of the map phase
    *
-   * @param activeSet optionally, a set of "active" vertices and a direction of edges to consider
+   * @param activeSetOpt optionally, a set of "active" vertices and a direction of edges to consider
    * when running `mapFunc`. For example, if the direction is Out, `mapFunc` will only be run on
-   * edges originating from vertices in the active set. `activeSet` must have the same index as the
-   * graph's vertices.
+   * edges originating from vertices in the active set. The active set must have the same index as
+   * the graph's vertices.
    *
-   * @example We can use this function to compute the inDegree of each
+   * @example We can use this function to compute the in-degree of each
    * vertex
    * {{{
    * val rawGraph: Graph[(),()] = Graph.textFile("twittergraph")
    * val inDeg: RDD[(VertexID, Int)] =
-   *   mapReduceTriplets[Int](et => Array((et.dst.id, 1)), _ + _)
+   *   mapReduceTriplets[Int](et => Iterator((et.dst.id, 1)), _ + _)
    * }}}
    *
    * @note By expressing computation at the edge level we achieve
@@ -316,10 +302,9 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
     : VertexRDD[A]
 
   /**
-   * Join the vertices with an RDD and then apply a function from the
-   * the vertex and RDD entry to a new vertex value and type.  The
-   * input table should contain at most one entry for each vertex.  If
-   * no entry is provided the map function is invoked passing none.
+   * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`.  The
+   * input table should contain at most one entry for each vertex.  If no entry in `table` is
+   * provided for a particular vertex in the graph, the map function receives `None`.
    *
    * @tparam U the type of entry in the table of updates
    * @tparam VD2 the new vertex value type
@@ -331,12 +316,11 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
    * values.  The map function is invoked for all vertices, even those
    * that do not have a corresponding entry in the table.
    *
-   * @example This function is used to update the vertices with new
-   * values based on external data.  For example we could add the out
-   * degree to each vertex record
+   * @example This function is used to update the vertices with new values based on external data.
+   * For example we could add the out-degree to each vertex record:
    *
    * {{{
-   * val rawGraph: Graph[(),()] = Graph.textFile("webgraph")
+   * val rawGraph: Graph[_, _] = Graph.textFile("webgraph")
    * val outDeg: RDD[(VertexID, Int)] = rawGraph.outDegrees()
    * val graph = rawGraph.outerJoinVertices(outDeg) {
    *   (vid, data, optDeg) => optDeg.getOrElse(0)
@@ -364,13 +348,13 @@ object Graph {
   /**
    * Construct a graph from a collection of edges encoded as vertex id pairs.
    *
-   * @param rawEdges a collection of edges in (src,dst) form.
+   * @param rawEdges a collection of edges in (src, dst) form
    * @param uniqueEdges if multiple identical edges are found they are combined and the edge
    * attribute is set to the sum.  Otherwise duplicate edges are treated as separate. To enable
-   * uniqueEdges, a [[PartitionStrategy]] must be provided.
+   * `uniqueEdges`, a [[PartitionStrategy]] must be provided.
    *
    * @return a graph with edge attributes containing either the count of duplicate edges or 1
-   * (if `uniqueEdges=None`) and vertex attributes containing the total degree of each vertex.
+   * (if `uniqueEdges` is `None`) and vertex attributes containing the total degree of each vertex.
    */
   def fromEdgeTuples[VD: ClassTag](
       rawEdges: RDD[(VertexID, VertexID)],
@@ -400,10 +384,10 @@ object Graph {
   }
 
   /**
-   * Construct a graph from a collection attributed vertices and
-   * edges.  Duplicate vertices are picked arbitrarily and
+   * Construct a graph from a collection of vertices and
+   * edges with attributes.  Duplicate vertices are picked arbitrarily and
    * vertices found in the edge collection but not in the input
-   * vertices are the default attribute.
+   * vertices are assigned the default attribute.
    *
    * @tparam VD the vertex attribute type
    * @tparam ED the edge attribute type
@@ -412,7 +396,7 @@ object Graph {
    * @param defaultVertexAttr the default vertex attribute to use for
    * vertices that are mentioned in edges but not in vertices
    * @param partitionStrategy the partition strategy to use when
-   * partitioning the edges.
+   * partitioning the edges
    */
   def apply[VD: ClassTag, ED: ClassTag](
       vertices: RDD[(VertexID, VD)],
@@ -422,10 +406,10 @@ object Graph {
   }
 
   /**
-   * The implicit graphToGraphOPs function extracts the GraphOps member from a graph.
+   * Implicitly extracts the [[GraphOps]] member from a graph.
    *
    * To improve modularity the Graph type only contains a small set of basic operations.  All the
-   * convenience operations are defined in the GraphOps class which may be shared across multiple
+   * convenience operations are defined in the [[GraphOps]] class which may be shared across multiple
    * graph implementations.
    */
   implicit def graphToGraphOps[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED]) = g.ops
author	Ankur Dave <ankurdave@gmail.com>	2014-01-10 12:17:04 -0800
committer	Ankur Dave <ankurdave@gmail.com>	2014-01-10 12:17:04 -0800
commit	5fcd2a61b4f10d039336e2ca3716e99c0d96ded2 (patch)
tree	217723eb3d24f88b2ba624160a0a4189b6bd821e
parent	4c114a7556c30b186f27a3f7faa9dd7a20b223e1 (diff)
download	spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.tar.gz spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.tar.bz2 spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.zip