diff options
author | Reynold Xin <rxin@apache.org> | 2014-01-13 18:31:49 -0800 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2014-01-13 18:31:49 -0800 |
commit | ae06d2c22ffb8af3c27c29bc55aadfb73b56e9ff (patch) | |
tree | 5bf0819ce56b4c18e5fdff0a64fe980704e5c031 | |
parent | 87f335db78221fc250bd64f39a334293db490379 (diff) | |
download | spark-ae06d2c22ffb8af3c27c29bc55aadfb73b56e9ff.tar.gz spark-ae06d2c22ffb8af3c27c29bc55aadfb73b56e9ff.tar.bz2 spark-ae06d2c22ffb8af3c27c29bc55aadfb73b56e9ff.zip |
Updated GraphGenerator.
-rw-r--r-- | graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala index e0fd9b972c..dbea233c34 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala @@ -15,6 +15,7 @@ import org.apache.spark.graphx.Graph import org.apache.spark.graphx.Edge import org.apache.spark.graphx.impl.GraphImpl +/** A collection of graph generating functions. */ object GraphGenerators { val RMATa = 0.45 @@ -24,6 +25,9 @@ object GraphGenerators { // Right now it just generates a bunch of edges where // the edge data is the weight (default 1) + /** + * Generate a graph whose vertex out degree is log normal. + */ def logNormalGraph(sc: SparkContext, numVertices: Int): Graph[Int, Int] = { // based on Pregel settings val mu = 4 @@ -32,13 +36,12 @@ object GraphGenerators { val vertices: RDD[(VertexID, Int)] = sc.parallelize(0 until numVertices).map{ src => (src, sampleLogNormal(mu, sigma, numVertices)) } - val edges = vertices.flatMap{ - v => generateRandomEdges(v._1.toInt, v._2, numVertices) + val edges = vertices.flatMap { v => + generateRandomEdges(v._1.toInt, v._2, numVertices) } Graph(vertices, edges, 0) } - def generateRandomEdges(src: Int, numEdges: Int, maxVertexID: Int): Array[Edge[Int]] = { val rand = new Random() var dsts: Set[Int] = Set() @@ -48,10 +51,9 @@ object GraphGenerators { dsts += nextDst } } - dsts.map {dst => Edge[Int](src, dst, 1) }.toArray + dsts.map(dst => Edge[Int](src, dst, 1)).toArray } - /** * Randomly samples from a log normal distribution whose corresponding normal distribution has the * the given mean and standard deviation. It uses the formula `X = exp(m+s*Z)` where `m`, `s` are @@ -60,9 +62,9 @@ object GraphGenerators { * * @param mu the mean of the normal distribution * @param sigma the standard deviation of the normal distribution - * @param macVal exclusive upper bound on the value of the sample + * @param maxVal exclusive upper bound on the value of the sample */ - def sampleLogNormal(mu: Double, sigma: Double, maxVal: Int): Int = { + private def sampleLogNormal(mu: Double, sigma: Double, maxVal: Int): Int = { val rand = new Random() val m = math.exp(mu+(sigma*sigma)/2.0) val s = math.sqrt((math.exp(sigma*sigma) - 1) * math.exp(2*mu + sigma*sigma)) @@ -76,27 +78,29 @@ object GraphGenerators { math.round(X.toFloat) } - - + /** + * A random graph generator using the R-MAT model, proposed in + * "R-MAT: A Recursive Model for Graph Mining" by Chakrabarti et al. + * + * See [[http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf]]. + */ def rmatGraph(sc: SparkContext, requestedNumVertices: Int, numEdges: Int): Graph[Int, Int] = { // let N = requestedNumVertices // the number of vertices is 2^n where n=ceil(log2[N]) // This ensures that the 4 quadrants are the same size at all recursion levels - val numVertices = math.round(math.pow(2.0, math.ceil(math.log(requestedNumVertices)/math.log(2.0)))).toInt + val numVertices = math.round( + math.pow(2.0, math.ceil(math.log(requestedNumVertices) / math.log(2.0)))).toInt var edges: Set[Edge[Int]] = Set() while (edges.size < numEdges) { if (edges.size % 100 == 0) { println(edges.size + " edges") } edges += addEdge(numVertices) - } - val graph = outDegreeFromEdges(sc.parallelize(edges.toList)) - graph - + outDegreeFromEdges(sc.parallelize(edges.toList)) } - def outDegreeFromEdges[ED: ClassTag](edges: RDD[Edge[ED]]): Graph[Int, ED] = { + private def outDegreeFromEdges[ED: ClassTag](edges: RDD[Edge[ED]]): Graph[Int, ED] = { val vertices = edges.flatMap { edge => List((edge.srcId, 1)) } .reduceByKey(_ + _) .map{ case (vid, degree) => (vid, degree) } @@ -107,7 +111,7 @@ object GraphGenerators { * @param numVertices Specifies the total number of vertices in the graph (used to get * the dimensions of the adjacency matrix */ - def addEdge(numVertices: Int): Edge[Int] = { + private def addEdge(numVertices: Int): Edge[Int] = { //val (src, dst) = chooseCell(numVertices/2.0, numVertices/2.0, numVertices/2.0) val v = math.round(numVertices.toFloat/2.0).toInt @@ -115,7 +119,6 @@ object GraphGenerators { Edge[Int](src, dst, 1) } - /** * This method recursively subdivides the the adjacency matrix into quadrants * until it picks a single cell. The naming conventions in this paper match @@ -149,10 +152,10 @@ object GraphGenerators { * }}} */ @tailrec - def chooseCell(x: Int, y: Int, t: Int): (Int, Int) = { - if (t <= 1) - (x,y) - else { + private def chooseCell(x: Int, y: Int, t: Int): (Int, Int) = { + if (t <= 1) { + (x, y) + } else { val newT = math.round(t.toFloat/2.0).toInt pickQuadrant(RMATa, RMATb, RMATc, RMATd) match { case 0 => chooseCell(x, y, newT) @@ -164,22 +167,21 @@ object GraphGenerators { } // TODO(crankshaw) turn result into an enum (or case class for pattern matching} - def pickQuadrant(a: Double, b: Double, c: Double, d: Double): Int = { - if (a+b+c+d != 1.0) { - throw new IllegalArgumentException("R-MAT probability parameters sum to " + (a+b+c+d) + ", should sum to 1.0") + private def pickQuadrant(a: Double, b: Double, c: Double, d: Double): Int = { + if (a + b + c + d != 1.0) { + throw new IllegalArgumentException( + "R-MAT probability parameters sum to " + (a+b+c+d) + ", should sum to 1.0") } val rand = new Random() val result = rand.nextDouble() result match { case x if x < a => 0 // 0 corresponds to quadrant a - case x if (x >= a && x < a+b) => 1 // 1 corresponds to b - case x if (x >= a+b && x < a+b+c) => 2 // 2 corresponds to c + case x if (x >= a && x < a + b) => 1 // 1 corresponds to b + case x if (x >= a + b && x < a + b + c) => 2 // 2 corresponds to c case _ => 3 // 3 corresponds to d } } - - /** * Create `rows` by `cols` grid graph with each vertex connected to its * row+1 and col+1 neighbors. Vertex ids are assigned in row major @@ -220,6 +222,4 @@ object GraphGenerators { Graph.fromEdgeTuples(edges, 1) } // end of starGraph - - } // end of Graph Generators |