diff options
author | Ankur Dave <ankurdave@gmail.com> | 2014-01-12 21:47:16 -0800 |
---|---|---|
committer | Ankur Dave <ankurdave@gmail.com> | 2014-01-12 21:47:16 -0800 |
commit | d691e9f47ed9b43b422712047183142d01c5e8c2 (patch) | |
tree | 41dcc5954b14c08dc63714e7e3c62ff57a86f61d | |
parent | 20c509b805dbfd0ebb11d2d7bd53a4379249a86f (diff) | |
download | spark-d691e9f47ed9b43b422712047183142d01c5e8c2.tar.gz spark-d691e9f47ed9b43b422712047183142d01c5e8c2.tar.bz2 spark-d691e9f47ed9b43b422712047183142d01c5e8c2.zip |
Move algorithms to GraphOps
5 files changed, 54 insertions, 87 deletions
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md index 0e228d8f28..572afc101b 100644 --- a/docs/graphx-programming-guide.md +++ b/docs/graphx-programming-guide.md @@ -667,9 +667,7 @@ things to worry about.) # Graph Algorithms <a name="graph_algorithms"></a> -GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via an implicit conversion to [`Algorithms`][Algorithms]. This section describes the algorithms and how they are used. - -[Algorithms]: api/graphx/index.html#org.apache.spark.graphx.lib.Algorithms +GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used. ## PageRank <a name="pagerank"></a> @@ -681,8 +679,6 @@ GraphX comes with static and dynamic implementations of PageRank as methods on t [PageRank]: api/graphx/index.html#org.apache.spark.graphx.lib.PageRank$ {% highlight scala %} -// Load the implicit conversion to Algorithms -import org.apache.spark.graphx.lib._ // Load the datasets into a graph val users = sc.textFile("graphx/data/users.txt").map { line => val fields = line.split("\\s+") @@ -710,8 +706,7 @@ The connected components algorithm labels each connected component of the graph [ConnectedComponents]: api/graphx/index.html#org.apache.spark.graphx.lib.ConnectedComponents$ {% highlight scala %} -// Load the implicit conversion and graph as in the PageRank example -import org.apache.spark.graphx.lib._ +// Load the graph as in the PageRank example val users = ... val followers = ... val graph = Graph(users, followers) @@ -733,8 +728,7 @@ A vertex is part of a triangle when it has two adjacent vertices with an edge be [Graph.partitionBy]: api/graphx/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED] {% highlight scala %} -// Load the implicit conversion and graph as in the PageRank example -import org.apache.spark.graphx.lib._ +// Load the graph as in the PageRank example val users = ... // Load the edges in canonical order and partition the graph for triangle count val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt", true).partitionBy(RandomVertexCut) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 56513cac20..7d4f0de3d6 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -15,9 +15,7 @@ import org.apache.spark.storage.StorageLevel * RDDs, the graph is a functional data-structure in which mutating * operations return new graphs. * - * @note [[GraphOps]] contains additional convenience operations. - * [[lib.Algorithms]] contains graph algorithms; to access these, - * import `org.apache.spark.graphx.lib._`. + * @note [[GraphOps]] contains additional convenience operations and graph algorithms. * * @tparam VD the vertex attribute type * @tparam ED the edge attribute type diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala index 4fdff29f5a..2b3b95e2ca 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala @@ -2,9 +2,10 @@ package org.apache.spark.graphx import scala.reflect.ClassTag -import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import org.apache.spark.SparkException +import org.apache.spark.graphx.lib._ +import org.apache.spark.rdd.RDD /** * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the @@ -298,4 +299,52 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) { Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg) } + /** + * Run a dynamic version of PageRank returning a graph with vertex attributes containing the + * PageRank and edge attributes containing the normalized edge weight. + * + * @see [[org.apache.spark.graphx.lib.PageRank]], method `runUntilConvergence`. + */ + def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = { + PageRank.runUntilConvergence(graph, tol, resetProb) + } + + /** + * Run PageRank for a fixed number of iterations returning a graph with vertex attributes + * containing the PageRank and edge attributes the normalized edge weight. + * + * @see [[org.apache.spark.graphx.lib.PageRank]], method `run`. + */ + def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = { + PageRank.run(graph, numIter, resetProb) + } + + /** + * Compute the connected component membership of each vertex and return a graph with the vertex + * value containing the lowest vertex id in the connected component containing that vertex. + * + * @see [[org.apache.spark.graphx.lib.ConnectedComponents]] + */ + def connectedComponents(): Graph[VertexID, ED] = { + ConnectedComponents.run(graph) + } + + /** + * Compute the number of triangles passing through each vertex. + * + * @see [[org.apache.spark.graphx.lib.TriangleCount]] + */ + def triangleCount(): Graph[Int, ED] = { + TriangleCount.run(graph) + } + + /** + * Compute the strongly connected component (SCC) of each vertex and return a graph with the + * vertex value containing the lowest vertex id in the SCC containing that vertex. + * + * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents]] + */ + def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = { + StronglyConnectedComponents.run(graph, numIter) + } } // end of GraphOps diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala deleted file mode 100644 index cbcd9c24a0..0000000000 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala +++ /dev/null @@ -1,66 +0,0 @@ -package org.apache.spark.graphx.lib - -import scala.reflect.ClassTag - -import org.apache.spark.graphx._ - -/** - * Provides graph algorithms directly on [[org.apache.spark.graphx.Graph]] via an implicit - * conversion. - * @example - * {{{ - * import org.apache.spark.graph.lib._ - * val graph: Graph[_, _] = loadGraph() - * graph.connectedComponents() - * }}} - */ -class Algorithms[VD: ClassTag, ED: ClassTag](self: Graph[VD, ED]) { - /** - * Run a dynamic version of PageRank returning a graph with vertex attributes containing the - * PageRank and edge attributes containing the normalized edge weight. - * - * @see [[org.apache.spark.graphx.lib.PageRank]], method `runUntilConvergence`. - */ - def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = { - PageRank.runUntilConvergence(self, tol, resetProb) - } - - /** - * Run PageRank for a fixed number of iterations returning a graph with vertex attributes - * containing the PageRank and edge attributes the normalized edge weight. - * - * @see [[org.apache.spark.graphx.lib.PageRank]], method `run`. - */ - def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = { - PageRank.run(self, numIter, resetProb) - } - - /** - * Compute the connected component membership of each vertex and return a graph with the vertex - * value containing the lowest vertex id in the connected component containing that vertex. - * - * @see [[org.apache.spark.graphx.lib.ConnectedComponents]] - */ - def connectedComponents(): Graph[VertexID, ED] = { - ConnectedComponents.run(self) - } - - /** - * Compute the number of triangles passing through each vertex. - * - * @see [[org.apache.spark.graphx.lib.TriangleCount]] - */ - def triangleCount(): Graph[Int, ED] = { - TriangleCount.run(self) - } - - /** - * Compute the strongly connected component (SCC) of each vertex and return a graph with the - * vertex value containing the lowest vertex id in the SCC containing that vertex. - * - * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents]] - */ - def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = { - StronglyConnectedComponents.run(self, numIter) - } -} diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala deleted file mode 100644 index f6f2626c9d..0000000000 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala +++ /dev/null @@ -1,8 +0,0 @@ -package org.apache.spark.graphx - -import scala.reflect.ClassTag - -package object lib { - implicit def graphToAlgorithms[VD: ClassTag, ED: ClassTag]( - graph: Graph[VD, ED]): Algorithms[VD, ED] = new Algorithms(graph) -} |