aboutsummaryrefslogtreecommitdiff
path: root/graphx/src
diff options
context:
space:
mode:
authorJoseph E. Gonzalez <joseph.e.gonzalez@gmail.com>2014-06-03 14:14:48 -0700
committerAnkur Dave <ankurdave@gmail.com>2014-06-03 14:14:48 -0700
commit894ecde04faa7e2054a40825a58b2e9cdaa93c70 (patch)
treecce968cd69ec52aaeb37a8930809d3757a3f975f /graphx/src
parentaa41a522d821c989c65fa3f7f2a4d372e39bb958 (diff)
downloadspark-894ecde04faa7e2054a40825a58b2e9cdaa93c70.tar.gz
spark-894ecde04faa7e2054a40825a58b2e9cdaa93c70.tar.bz2
spark-894ecde04faa7e2054a40825a58b2e9cdaa93c70.zip
Synthetic GraphX Benchmark
This PR accomplishes two things: 1. It introduces a Synthetic Benchmark application that generates an arbitrarily large log-normal graph and executes either PageRank or connected components on the graph. This can be used to profile GraphX system on arbitrary clusters without access to large graph datasets 2. This PR improves the implementation of the log-normal graph generator. Author: Joseph E. Gonzalez <joseph.e.gonzalez@gmail.com> Author: Ankur Dave <ankurdave@gmail.com> Closes #720 from jegonzal/graphx_synth_benchmark and squashes the following commits: e40812a [Ankur Dave] Exclude all of GraphX from compatibility checks vs. 1.0.0 bccccad [Ankur Dave] Fix long lines 374678a [Ankur Dave] Bugfix and style changes 1bdf39a [Joseph E. Gonzalez] updating options d943972 [Joseph E. Gonzalez] moving the benchmark application into the examples folder. f4f839a [Joseph E. Gonzalez] Creating a synthetic benchmark script.
Diffstat (limited to 'graphx/src')
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala9
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala41
2 files changed, 41 insertions, 9 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
index 1526ccef06..ef412cfd4e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -119,4 +119,13 @@ object PartitionStrategy {
math.abs((lower, higher).hashCode()) % numParts
}
}
+
+ /** Returns the PartitionStrategy with the specified name. */
+ def fromString(s: String): PartitionStrategy = s match {
+ case "RandomVertexCut" => RandomVertexCut
+ case "EdgePartition1D" => EdgePartition1D
+ case "EdgePartition2D" => EdgePartition2D
+ case "CanonicalRandomVertexCut" => CanonicalRandomVertexCut
+ case _ => throw new IllegalArgumentException("Invalid PartitionStrategy: " + s)
+ }
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
index a3c8de3f90..635514f09e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -38,19 +38,42 @@ object GraphGenerators {
val RMATa = 0.45
val RMATb = 0.15
val RMATd = 0.25
+
/**
* Generate a graph whose vertex out degree is log normal.
+ *
+ * The default values for mu and sigma are taken from the Pregel paper:
+ *
+ * Grzegorz Malewicz, Matthew H. Austern, Aart J.C Bik, James C. Dehnert,
+ * Ilan Horn, Naty Leiser, and Grzegorz Czajkowski. 2010.
+ * Pregel: a system for large-scale graph processing. SIGMOD '10.
+ *
+ * @param sc
+ * @param numVertices
+ * @param mu
+ * @param sigma
+ * @return
*/
- def logNormalGraph(sc: SparkContext, numVertices: Int): Graph[Int, Int] = {
- // based on Pregel settings
- val mu = 4
- val sigma = 1.3
-
- val vertices: RDD[(VertexId, Int)] = sc.parallelize(0 until numVertices).map{
- src => (src, sampleLogNormal(mu, sigma, numVertices))
+ def logNormalGraph(sc: SparkContext, numVertices: Int, numEParts: Int,
+ mu: Double = 4.0, sigma: Double = 1.3): Graph[Long, Int] = {
+ val vertices = sc.parallelize(0 until numVertices, numEParts).map { src =>
+ // Initialize the random number generator with the source vertex id
+ val rand = new Random(src)
+ val degree = math.min(numVertices.toLong, math.exp(rand.nextGaussian() * sigma + mu).toLong)
+ (src.toLong, degree)
}
- val edges = vertices.flatMap { v =>
- generateRandomEdges(v._1.toInt, v._2, numVertices)
+ val edges = vertices.flatMap { case (src, degree) =>
+ new Iterator[Edge[Int]] {
+ // Initialize the random number generator with the source vertex id
+ val rand = new Random(src)
+ var i = 0
+ override def hasNext(): Boolean = { i < degree }
+ override def next(): Edge[Int] = {
+ val nextEdge = Edge[Int](src, rand.nextInt(numVertices), i)
+ i += 1
+ nextEdge
+ }
+ }
}
Graph(vertices, edges, 0)
}