aboutsummaryrefslogtreecommitdiff
path: root/graphx
diff options
context:
space:
mode:
authorReynold Xin <rxin@apache.org>2014-01-13 18:51:04 -0800
committerReynold Xin <rxin@apache.org>2014-01-13 18:51:04 -0800
commit0b18bfba1aba60c2a1f576f10d9ab2fa316ebfa0 (patch)
treeabab357d595dd85493b8366410983fb7459be4e3 /graphx
parent9317286b72ec8bb065b0422c344c267cc49189e3 (diff)
downloadspark-0b18bfba1aba60c2a1f576f10d9ab2fa316ebfa0.tar.gz
spark-0b18bfba1aba60c2a1f576f10d9ab2fa316ebfa0.tar.bz2
spark-0b18bfba1aba60c2a1f576f10d9ab2fa316ebfa0.zip
Updated doc for PageRank.
Diffstat (limited to 'graphx')
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala86
1 files changed, 39 insertions, 47 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index b2056699aa..08256dccb2 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -5,7 +5,42 @@ import scala.reflect.ClassTag
import org.apache.spark.Logging
import org.apache.spark.graphx._
-/** PageRank algorithm implementation. */
+/**
+ * PageRank algorithm implementation. There are two implementations of PageRank implemented.
+ *
+ * The first implementation uses the [[Pregel]] interface and runs PageRank for a fixed number
+ * of iterations:
+ * {{{
+ * var PR = Array.fill(n)( 1.0 )
+ * val oldPR = Array.fill(n)( 1.0 )
+ * for( iter <- 0 until numIter ) {
+ * swap(oldPR, PR)
+ * for( i <- 0 until n ) {
+ * PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+ * }
+ * }
+ * }}}
+ *
+ * The second implementation uses the standalone [[Graph]] interface and runs PageRank until
+ * convergence:
+ *
+ * {{{
+ * var PR = Array.fill(n)( 1.0 )
+ * val oldPR = Array.fill(n)( 0.0 )
+ * while( max(abs(PR - oldPr)) > tol ) {
+ * swap(oldPR, PR)
+ * for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
+ * PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+ * }
+ * }
+ * }}}
+ *
+ * `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
+ * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
+ *
+ * Note that this is not the "normalized" PageRank and as a consequence pages that have no
+ * inlinks will have a PageRank of alpha.
+ */
object PageRank extends Logging {
/**
@@ -13,26 +48,6 @@ object PageRank extends Logging {
* with vertex attributes containing the PageRank and edge
* attributes the normalized edge weight.
*
- * The following PageRank fixed point is computed for each vertex.
- *
- * {{{
- * var PR = Array.fill(n)( 1.0 )
- * val oldPR = Array.fill(n)( 1.0 )
- * for( iter <- 0 until numIter ) {
- * swap(oldPR, PR)
- * for( i <- 0 until n ) {
- * PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
- * }
- * }
- * }}}
- *
- * where `alpha` is the random reset probability (typically 0.15),
- * `inNbrs[i]` is the set of neighbors whick link to `i` and
- * `outDeg[j]` is the out degree of vertex `j`.
- *
- * Note that this is not the "normalized" PageRank and as a consequence pages that have no
- * inlinks will have a PageRank of alpha.
- *
* @tparam VD the original vertex attribute (not used)
* @tparam ED the original edge attribute (not used)
*
@@ -47,16 +62,11 @@ object PageRank extends Logging {
def run[VD: ClassTag, ED: ClassTag](
graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
{
-
- /**
- * Initialize the pagerankGraph with each edge attribute having
- * weight 1/outDegree and each vertex with attribute 1.0.
- */
+ // Initialize the pagerankGraph with each edge attribute having
+ // weight 1/outDegree and each vertex with attribute 1.0.
val pagerankGraph: Graph[Double, Double] = graph
// Associate the degree with each vertex
- .outerJoinVertices(graph.outDegrees){
- (vid, vdata, deg) => deg.getOrElse(0)
- }
+ .outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
// Set the weight on the edges based on the degree
.mapTriplets( e => 1.0 / e.srcAttr )
// Set the vertex attributes to the initial pagerank values
@@ -85,23 +95,6 @@ object PageRank extends Logging {
* Run a dynamic version of PageRank returning a graph with vertex attributes containing the
* PageRank and edge attributes containing the normalized edge weight.
*
- * {{{
- * var PR = Array.fill(n)( 1.0 )
- * val oldPR = Array.fill(n)( 0.0 )
- * while( max(abs(PR - oldPr)) > tol ) {
- * swap(oldPR, PR)
- * for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
- * PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
- * }
- * }
- * }}}
- *
- * where `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
- * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
- *
- * Note that this is not the "normalized" PageRank and as a consequence pages that have no
- * inlinks will have a PageRank of alpha.
- *
* @tparam VD the original vertex attribute (not used)
* @tparam ED the original edge attribute (not used)
*
@@ -157,5 +150,4 @@ object PageRank extends Logging {
vertexProgram, sendMessage, messageCombiner)
.mapVertices((vid, attr) => attr._1)
} // end of deltaPageRank
-
}