Move algorithms to GraphOps

author: Ankur Dave <ankurdave@gmail.com> 2014-01-12 21:47:16 -0800
committer: Ankur Dave <ankurdave@gmail.com> 2014-01-12 21:47:16 -0800
commit: d691e9f47ed9b43b422712047183142d01c5e8c2 (patch)
tree: 41dcc5954b14c08dc63714e7e3c62ff57a86f61d
parent: 20c509b805dbfd0ebb11d2d7bd53a4379249a86f (diff)
download: spark-d691e9f47ed9b43b422712047183142d01c5e8c2.tar.gz
spark-d691e9f47ed9b43b422712047183142d01c5e8c2.tar.bz2
spark-d691e9f47ed9b43b422712047183142d01c5e8c2.zip
5 files changed, 54 insertions, 87 deletions
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 0e228d8f28..572afc101b 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -667,9 +667,7 @@ things to worry about.)
 # Graph Algorithms
 <a name="graph_algorithms"></a>
 
-GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via an implicit conversion to [`Algorithms`][Algorithms]. This section describes the algorithms and how they are used.
-
-[Algorithms]: api/graphx/index.html#org.apache.spark.graphx.lib.Algorithms
+GraphX includes a set of graph algorithms in to simplify analytics. The algorithms are contained in the `org.apache.spark.graphx.lib` package and can be accessed directly as methods on `Graph` via [`GraphOps`][GraphOps]. This section describes the algorithms and how they are used.
 
 ## PageRank
 <a name="pagerank"></a>
@@ -681,8 +679,6 @@ GraphX comes with static and dynamic implementations of PageRank as methods on t
 [PageRank]: api/graphx/index.html#org.apache.spark.graphx.lib.PageRank$
 
 {% highlight scala %}
-// Load the implicit conversion to Algorithms
-import org.apache.spark.graphx.lib._
 // Load the datasets into a graph
 val users = sc.textFile("graphx/data/users.txt").map { line =>
   val fields = line.split("\\s+")
@@ -710,8 +706,7 @@ The connected components algorithm labels each connected component of the graph
 [ConnectedComponents]: api/graphx/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
 
 {% highlight scala %}
-// Load the implicit conversion and graph as in the PageRank example
-import org.apache.spark.graphx.lib._
+// Load the graph as in the PageRank example
 val users = ...
 val followers = ...
 val graph = Graph(users, followers)
@@ -733,8 +728,7 @@ A vertex is part of a triangle when it has two adjacent vertices with an edge be
 [Graph.partitionBy]: api/graphx/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
 
 {% highlight scala %}
-// Load the implicit conversion and graph as in the PageRank example
-import org.apache.spark.graphx.lib._
+// Load the graph as in the PageRank example
 val users = ...
 // Load the edges in canonical order and partition the graph for triangle count
 val graph = GraphLoader.edgeListFile(sc, "graphx/data/followers.txt", true).partitionBy(RandomVertexCut)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 56513cac20..7d4f0de3d6 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -15,9 +15,7 @@ import org.apache.spark.storage.StorageLevel
  * RDDs, the graph is a functional data-structure in which mutating
  * operations return new graphs.
  *
- * @note [[GraphOps]] contains additional convenience operations.
- * [[lib.Algorithms]] contains graph algorithms; to access these,
- * import `org.apache.spark.graphx.lib._`.
+ * @note [[GraphOps]] contains additional convenience operations and graph algorithms.
  *
  * @tparam VD the vertex attribute type
  * @tparam ED the edge attribute type
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 4fdff29f5a..2b3b95e2ca 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -2,9 +2,10 @@ package org.apache.spark.graphx
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 import org.apache.spark.SparkException
+import org.apache.spark.graphx.lib._
+import org.apache.spark.rdd.RDD
 
 /**
  * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the
@@ -298,4 +299,52 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) {
     Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg)
   }
 
+  /**
+   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
+   * PageRank and edge attributes containing the normalized edge weight.
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank]], method `runUntilConvergence`.
+   */
+  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
+    PageRank.runUntilConvergence(graph, tol, resetProb)
+  }
+
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
+   * containing the PageRank and edge attributes the normalized edge weight.
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank]], method `run`.
+   */
+  def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = {
+    PageRank.run(graph, numIter, resetProb)
+  }
+
+  /**
+   * Compute the connected component membership of each vertex and return a graph with the vertex
+   * value containing the lowest vertex id in the connected component containing that vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.ConnectedComponents]]
+   */
+  def connectedComponents(): Graph[VertexID, ED] = {
+    ConnectedComponents.run(graph)
+  }
+
+  /**
+   * Compute the number of triangles passing through each vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.TriangleCount]]
+   */
+  def triangleCount(): Graph[Int, ED] = {
+    TriangleCount.run(graph)
+  }
+
+  /**
+   * Compute the strongly connected component (SCC) of each vertex and return a graph with the
+   * vertex value containing the lowest vertex id in the SCC containing that vertex.
+   *
+   * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents]]
+   */
+  def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = {
+    StronglyConnectedComponents.run(graph, numIter)
+  }
 } // end of GraphOps
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala
deleted file mode 100644
index cbcd9c24a0..0000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/Algorithms.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-package org.apache.spark.graphx.lib
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graphx._
-
-/**
- * Provides graph algorithms directly on [[org.apache.spark.graphx.Graph]] via an implicit
- * conversion.
- * @example
- * {{{
- * import org.apache.spark.graph.lib._
- * val graph: Graph[_, _] = loadGraph()
- * graph.connectedComponents()
- * }}}
- */
-class Algorithms[VD: ClassTag, ED: ClassTag](self: Graph[VD, ED]) {
-  /**
-   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
-   * PageRank and edge attributes containing the normalized edge weight.
-   *
-   * @see [[org.apache.spark.graphx.lib.PageRank]], method `runUntilConvergence`.
-   */
-  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
-    PageRank.runUntilConvergence(self, tol, resetProb)
-  }
-
-  /**
-   * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
-   * containing the PageRank and edge attributes the normalized edge weight.
-   *
-   * @see [[org.apache.spark.graphx.lib.PageRank]], method `run`.
-   */
-  def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = {
-    PageRank.run(self, numIter, resetProb)
-  }
-
-  /**
-   * Compute the connected component membership of each vertex and return a graph with the vertex
-   * value containing the lowest vertex id in the connected component containing that vertex.
-   *
-   * @see [[org.apache.spark.graphx.lib.ConnectedComponents]]
-   */
-  def connectedComponents(): Graph[VertexID, ED] = {
-    ConnectedComponents.run(self)
-  }
-
-  /**
-   * Compute the number of triangles passing through each vertex.
-   *
-   * @see [[org.apache.spark.graphx.lib.TriangleCount]]
-   */
-  def triangleCount(): Graph[Int, ED] = {
-    TriangleCount.run(self)
-  }
-
-  /**
-   * Compute the strongly connected component (SCC) of each vertex and return a graph with the
-   * vertex value containing the lowest vertex id in the SCC containing that vertex.
-   *
-   * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents]]
-   */
-  def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED] = {
-    StronglyConnectedComponents.run(self, numIter)
-  }
-}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala
deleted file mode 100644
index f6f2626c9d..0000000000
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/package.scala
+++ /dev/null
@@ -1,8 +0,0 @@
-package org.apache.spark.graphx
-
-import scala.reflect.ClassTag
-
-package object lib {
-  implicit def graphToAlgorithms[VD: ClassTag, ED: ClassTag](
-      graph: Graph[VD, ED]): Algorithms[VD, ED] = new Algorithms(graph)
-}
author	Ankur Dave <ankurdave@gmail.com>	2014-01-12 21:47:16 -0800
committer	Ankur Dave <ankurdave@gmail.com>	2014-01-12 21:47:16 -0800
commit	d691e9f47ed9b43b422712047183142d01c5e8c2 (patch)
tree	41dcc5954b14c08dc63714e7e3c62ff57a86f61d
parent	20c509b805dbfd0ebb11d2d7bd53a4379249a86f (diff)
download	spark-d691e9f47ed9b43b422712047183142d01c5e8c2.tar.gz spark-d691e9f47ed9b43b422712047183142d01c5e8c2.tar.bz2 spark-d691e9f47ed9b43b422712047183142d01c5e8c2.zip