aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnkur Dave <ankurdave@gmail.com>2014-01-10 12:17:04 -0800
committerAnkur Dave <ankurdave@gmail.com>2014-01-10 12:17:04 -0800
commit5fcd2a61b4f10d039336e2ca3716e99c0d96ded2 (patch)
tree217723eb3d24f88b2ba624160a0a4189b6bd821e
parent4c114a7556c30b186f27a3f7faa9dd7a20b223e1 (diff)
downloadspark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.tar.gz
spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.tar.bz2
spark-5fcd2a61b4f10d039336e2ca3716e99c0d96ded2.zip
Finish cleaning up Graph docs
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/Graph.scala180
1 files changed, 82 insertions, 98 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index aeeaddb320..a188de5537 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -15,8 +15,8 @@ import org.apache.spark.storage.StorageLevel
* RDDs, the graph is a functional data-structure in which mutating
* operations return new graphs.
*
- * @note [[org.apache.spark.graphx.GraphOps]] contains additional convenience operations.
- * [[org.apache.spark.graphx.algorithms.Algorithms]] contains graph algorithms; to access these,
+ * @note [[GraphOps]] contains additional convenience operations.
+ * [[algorithms.Algorithms]] contains graph algorithms; to access these,
* import `org.apache.spark.graphx.algorithms._`.
*
* @tparam VD the vertex attribute type
@@ -25,32 +25,31 @@ import org.apache.spark.storage.StorageLevel
abstract class Graph[VD: ClassTag, ED: ClassTag] {
/**
- * Get the vertices and their data.
+ * An RDD containing the vertices and their associated attributes.
*
* @note vertex ids are unique.
- * @return An RDD containing the vertices in this graph
+ * @return an RDD containing the vertices in this graph
*/
val vertices: VertexRDD[VD]
/**
- * Get the Edges and their data as an RDD. The entries in the RDD
- * contain just the source id and target id along with the edge
- * data.
+ * An RDD containing the edges and their associated attributes. The entries in the RDD contain
+ * just the source id and target id along with the edge data.
*
- * @return An RDD containing the edges in this graph
+ * @return an RDD containing the edges in this graph
*
- * @see [[org.apache.spark.graphx.Edge]] for the edge type.
- * @see `triplets` to get an RDD which contains all the edges
+ * @see [[Edge]] for the edge type.
+ * @see [[triplets]] to get an RDD which contains all the edges
* along with their vertex data.
*
*/
val edges: EdgeRDD[ED]
/**
- * Get the edges with the vertex data associated with the adjacent
- * pair of vertices.
+ * An RDD containing the edge triplets, which are edges along with the vertex data associated with
+ * the adjacent vertices.
*
- * @return An RDD containing edge triplets.
+ * @return an RDD containing edge triplets
*
* @example This operation might be used to evaluate a graph
* coloring where we would like to check that both vertices are a
@@ -68,7 +67,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
val triplets: RDD[EdgeTriplet[VD, ED]]
/**
- * Cache the vertices and edges associated with this graph.
+ * Caches the vertices and edges associated with this graph at the specified storage level.
*
* @param newLevel the level at which to cache the graph.
@@ -78,14 +77,14 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
/**
- * Return a graph that is cached when first created. This is used to
+ * Caches the vertices and edges associated with this graph. This is used to
* pin a graph in memory enabling multiple queries to reuse the same
* construction process.
*/
def cache(): Graph[VD, ED]
/**
- * Uncache only the vertices of this graph, leaving the edges alone. This is useful in iterative
+ * Uncaches only the vertices of this graph, leaving the edges alone. This is useful in iterative
* algorithms that modify the vertex attributes but reuse the edges. This method can be used to
* uncache the vertex attributes of previous iterations once they are no longer needed, improving
* GC performance.
@@ -93,23 +92,22 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]
/**
- * Repartition the edges in the graph according to `partitionStrategy`.
+ * Repartitions the edges in the graph according to `partitionStrategy`.
*/
def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
/**
- * Compute statistics describing the graph representation.
+ * Computes statistics describing the graph representation.
*/
def statistics: Map[String, Any]
/**
- * Construct a new graph where each vertex value has been
- * transformed by the map function.
+ * Transforms each vertex attribute in the graph using the map function.
*
* @note The new graph has the same structure. As a consequence the underlying index structures
* can be reused.
*
- * @param map the function from a vertex object to a new vertex value.
+ * @param map the function from a vertex object to a new vertex value
*
* @tparam VD2 the new vertex data type
*
@@ -125,10 +123,9 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
def mapVertices[VD2: ClassTag](map: (VertexID, VD) => VD2): Graph[VD2, ED]
/**
- * Construct a new graph where the value of each edge is
- * transformed by the map operation. This function is not passed
- * the vertex value for the vertices adjacent to the edge. If
- * vertex values are desired, use the `mapTriplets` method.
+ * Transforms each edge attribute in the graph using the map function. The map function is not
+ * passed the vertex value for the vertices adjacent to the edge. If vertex values are desired,
+ * use [[mapTriplets]].
*
* @note This graph is not changed and that the new graph has the
* same structure. As a consequence the underlying index structures
@@ -147,19 +144,19 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
}
/**
- * Construct a new graph transforming the value of each edge using the user defined iterator
- * transform. The iterator transform is given an iterator over edges within a logical partition
- * as well as the partition's ID, and it should return a new iterator over the new values of each
- * edge. The new iterator's elements must correspond one-to-one with the old iterator's
- * elements. If adjacent vertex values are desired, use the `mapTriplets` method.
+ * Transforms each edge attribute using the map function, passing it a whole partition at a
+ * time. The map function is given an iterator over edges within a logical partition as well as
+ * the partition's ID, and it should return a new iterator over the new values of each edge. The
+ * new iterator's elements must correspond one-to-one with the old iterator's elements. If
+ * adjacent vertex values are desired, use [[mapTriplets]].
*
* @note This does not change the structure of the
* graph or modify the values of this graph. As a consequence
* the underlying index structures can be reused.
*
- * @param map the function which takes a partition id and an iterator
- * over all the edges in the partition and must return an iterator over
- * the new values for each edge in the order of the input iterator.
+ * @param map a function that takes a partition id and an iterator
+ * over all the edges in the partition, and must return an iterator over
+ * the new values for each edge in the order of the input iterator
*
* @tparam ED2 the new edge data type
*
@@ -168,11 +165,8 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
/**
- * Construct a new graph where the value of each edge is
- * transformed by the map operation. This function passes vertex
- * values for the adjacent vertices to the map function. If
- * adjacent vertex values are not required, consider using the
- * `mapEdges` method instead.
+ * Transforms each edge attribute using the map function, passing it the adjacent vertex attributes
+ * as well. If adjacent vertex values are not required, consider using [[mapEdges]] instead.
*
* @note This does not change the structure of the
* graph or modify the values of this graph. As a consequence
@@ -196,21 +190,17 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
}
/**
- * Construct a new graph transforming the value of each edge using
- * the user defined iterator transform. The iterator transform is
- * given an iterator over edge triplets within a logical partition
- * and should yield a new iterator over the new values of each edge
- * in the order in which they are provided to the iterator transform
- * If adjacent vertex values are not required, consider using the
- * mapEdges function instead.
- *
- * @note This that this does not change the structure of the
+ * Transforms each edge attribute a partition at a time using the map function, passing it the
+ * adjacent vertex attributes as well. The map function is given an iterator over edge triplets
+ * within a logical partition and should yield a new iterator over the new values of each edge in
+ * the order in which they are provided. If adjacent vertex values are not required, consider
+ * using [[mapEdges]] instead.
+ *
+ * @note This does not change the structure of the
* graph or modify the values of this graph. As a consequence
* the underlying index structures can be reused.
*
- * @param map the function which takes a partition id and an iterator
- * over all the edges in the partition and must return an iterator over
- * the new values for each edge in the order of the input iterator.
+ * @param map the iterator transform
*
* @tparam ED2 the new edge data type
*
@@ -220,86 +210,82 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
Graph[VD, ED2]
/**
- * Construct a new graph with all the edges reversed. If this graph
- * contains an edge from a to b then the returned graph contains an
- * edge from b to a.
+ * Reverses all edges in the graph. If this graph contains an edge from a to b then the returned
+ * graph contains an edge from b to a.
*/
def reverse: Graph[VD, ED]
/**
- * This function takes a vertex and edge predicate and constructs
- * the subgraph that consists of vertices and edges that satisfy the
- * predict. The resulting graph contains the vertices and edges
- * that satisfy:
+ * Restricts the graph to only the vertices and edges satisfying the predicates. The resulting
+ * subgraph satisifies
*
* {{{
* V' = {v : for all v in V where vpred(v)}
* E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
* }}}
*
- * @param epred the edge predicate which takes a triplet and
+ * @param epred the edge predicate, which takes a triplet and
* evaluates to true if the edge is to remain in the subgraph. Note
- * that only edges in which both vertices satisfy the vertex
+ * that only edges where both vertices satisfy the vertex
* predicate are considered.
*
- * @param vpred the vertex predicate which takes a vertex object and
+ * @param vpred the vertex predicate, which takes a vertex object and
* evaluates to true if the vertex is to be included in the subgraph
*
* @return the subgraph containing only the vertices and edges that
- * satisfy the predicates.
+ * satisfy the predicates
*/
def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
vpred: (VertexID, VD) => Boolean = ((v,d) => true) ): Graph[VD, ED]
/**
- * Subgraph of this graph with only vertices and edges from the other graph.
+ * Restricts the graph to only the vertices and edges that are also in `other`, but keeps the
+ * attributes from this graph.
* @param other the graph to project this graph onto
- * @return a graph with vertices and edges that exists in both the current graph and other,
- * with vertex and edge data from the current graph.
+ * @return a graph with vertices and edges that exist in both the current graph and `other`,
+ * with vertex and edge data from the current graph
*/
def mask[VD2: ClassTag, ED2: ClassTag](other: Graph[VD2, ED2]): Graph[VD, ED]
/**
- * This function merges multiple edges between two vertices into a single Edge. For correct
- * results, the graph must have been partitioned using partitionBy.
+ * Merges multiple edges between two vertices into a single edge. For correct results, the graph
+ * must have been partitioned using [[partitionBy]].
*
* @tparam ED2 the type of the resulting edge data after grouping.
*
- * @param f the user supplied commutative associative function to merge edge attributes for
+ * @param f the user-supplied commutative associative function to merge edge attributes for
* duplicate edges.
*
- * @return Graph[VD,ED2] The resulting graph with a single Edge for each source, dest vertex pair.
+ * @return The resulting graph with a single edge for each (source, dest) vertex pair.
*/
def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]
/**
- * The mapReduceTriplets function is used to compute statistics
- * about the neighboring edges and vertices of each vertex. The
- * user supplied `mapFunc` function is invoked on each edge of the
- * graph generating 0 or more "messages" to be "sent" to either
- * vertex in the edge. The `reduceFunc` is then used to combine the
- * output of the map phase destined to each vertex.
+ * Computes statistics about the neighboring edges and vertices of each vertex. The user supplied
+ * `mapFunc` function is invoked on each edge of the graph, generating 0 or more "messages" to be
+ * "sent" to either vertex in the edge. The `reduceFunc` is then used to combine the output of
+ * the map phase destined to each vertex.
*
* @tparam A the type of "message" to be sent to each vertex
*
* @param mapFunc the user defined map function which returns 0 or
- * more messages to neighboring vertices.
+ * more messages to neighboring vertices
*
* @param reduceFunc the user defined reduce function which should
* be commutative and assosciative and is used to combine the output
- * of the map phase.
+ * of the map phase
*
- * @param activeSet optionally, a set of "active" vertices and a direction of edges to consider
+ * @param activeSetOpt optionally, a set of "active" vertices and a direction of edges to consider
* when running `mapFunc`. For example, if the direction is Out, `mapFunc` will only be run on
- * edges originating from vertices in the active set. `activeSet` must have the same index as the
- * graph's vertices.
+ * edges originating from vertices in the active set. The active set must have the same index as
+ * the graph's vertices.
*
- * @example We can use this function to compute the inDegree of each
+ * @example We can use this function to compute the in-degree of each
* vertex
* {{{
* val rawGraph: Graph[(),()] = Graph.textFile("twittergraph")
* val inDeg: RDD[(VertexID, Int)] =
- * mapReduceTriplets[Int](et => Array((et.dst.id, 1)), _ + _)
+ * mapReduceTriplets[Int](et => Iterator((et.dst.id, 1)), _ + _)
* }}}
*
* @note By expressing computation at the edge level we achieve
@@ -316,10 +302,9 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
: VertexRDD[A]
/**
- * Join the vertices with an RDD and then apply a function from the
- * the vertex and RDD entry to a new vertex value and type. The
- * input table should contain at most one entry for each vertex. If
- * no entry is provided the map function is invoked passing none.
+ * Joins the vertices with entries in the `table` RDD and merges the results using `mapFunc`. The
+ * input table should contain at most one entry for each vertex. If no entry in `table` is
+ * provided for a particular vertex in the graph, the map function receives `None`.
*
* @tparam U the type of entry in the table of updates
* @tparam VD2 the new vertex value type
@@ -331,12 +316,11 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] {
* values. The map function is invoked for all vertices, even those
* that do not have a corresponding entry in the table.
*
- * @example This function is used to update the vertices with new
- * values based on external data. For example we could add the out
- * degree to each vertex record
+ * @example This function is used to update the vertices with new values based on external data.
+ * For example we could add the out-degree to each vertex record:
*
* {{{
- * val rawGraph: Graph[(),()] = Graph.textFile("webgraph")
+ * val rawGraph: Graph[_, _] = Graph.textFile("webgraph")
* val outDeg: RDD[(VertexID, Int)] = rawGraph.outDegrees()
* val graph = rawGraph.outerJoinVertices(outDeg) {
* (vid, data, optDeg) => optDeg.getOrElse(0)
@@ -364,13 +348,13 @@ object Graph {
/**
* Construct a graph from a collection of edges encoded as vertex id pairs.
*
- * @param rawEdges a collection of edges in (src,dst) form.
+ * @param rawEdges a collection of edges in (src, dst) form
* @param uniqueEdges if multiple identical edges are found they are combined and the edge
* attribute is set to the sum. Otherwise duplicate edges are treated as separate. To enable
- * uniqueEdges, a [[PartitionStrategy]] must be provided.
+ * `uniqueEdges`, a [[PartitionStrategy]] must be provided.
*
* @return a graph with edge attributes containing either the count of duplicate edges or 1
- * (if `uniqueEdges=None`) and vertex attributes containing the total degree of each vertex.
+ * (if `uniqueEdges` is `None`) and vertex attributes containing the total degree of each vertex.
*/
def fromEdgeTuples[VD: ClassTag](
rawEdges: RDD[(VertexID, VertexID)],
@@ -400,10 +384,10 @@ object Graph {
}
/**
- * Construct a graph from a collection attributed vertices and
- * edges. Duplicate vertices are picked arbitrarily and
+ * Construct a graph from a collection of vertices and
+ * edges with attributes. Duplicate vertices are picked arbitrarily and
* vertices found in the edge collection but not in the input
- * vertices are the default attribute.
+ * vertices are assigned the default attribute.
*
* @tparam VD the vertex attribute type
* @tparam ED the edge attribute type
@@ -412,7 +396,7 @@ object Graph {
* @param defaultVertexAttr the default vertex attribute to use for
* vertices that are mentioned in edges but not in vertices
* @param partitionStrategy the partition strategy to use when
- * partitioning the edges.
+ * partitioning the edges
*/
def apply[VD: ClassTag, ED: ClassTag](
vertices: RDD[(VertexID, VD)],
@@ -422,10 +406,10 @@ object Graph {
}
/**
- * The implicit graphToGraphOPs function extracts the GraphOps member from a graph.
+ * Implicitly extracts the [[GraphOps]] member from a graph.
*
* To improve modularity the Graph type only contains a small set of basic operations. All the
- * convenience operations are defined in the GraphOps class which may be shared across multiple
+ * convenience operations are defined in the [[GraphOps]] class which may be shared across multiple
* graph implementations.
*/
implicit def graphToGraphOps[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED]) = g.ops