aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenji Kikushima <kikushima.kenji@lab.ntt.co.jp>2015-01-21 12:34:00 -0800
committerAnkur Dave <ankurdave@gmail.com>2015-01-21 12:38:09 -0800
commit37db20c9414d26ebd423e9500825bedc037b20f5 (patch)
tree563c3398c4c6a3ab174c2d090bc5363d87b4497c
parente90f6b5c6e27c60219aa0129b50985ebade048a0 (diff)
downloadspark-37db20c9414d26ebd423e9500825bedc037b20f5.tar.gz
spark-37db20c9414d26ebd423e9500825bedc037b20f5.tar.bz2
spark-37db20c9414d26ebd423e9500825bedc037b20f5.zip
[SPARK-5064][GraphX] Add numEdges upperbound validation for R-MAT graph generator to prevent infinite loop
I looked into GraphGenerators#chooseCell, and found that chooseCell can't generate more edges than pow(2, (2 * (log2(numVertices)-1))) to make a Power-law graph. (Ex. numVertices:4 upperbound:4, numVertices:8 upperbound:16, numVertices:16 upperbound:64) If we request more edges over the upperbound, rmatGraph fall into infinite loop. So, how about adding an argument validation? Author: Kenji Kikushima <kikushima.kenji@lab.ntt.co.jp> Closes #3950 from kj-ki/SPARK-5064 and squashes the following commits: 4ee18c7 [Ankur Dave] Reword error message and add unit test d760bc7 [Kenji Kikushima] Add numEdges upperbound validation for R-MAT graph generator to prevent infinite loop. (cherry picked from commit 3ee3ab592eee831d759c940eb68231817ad6d083) Signed-off-by: Ankur Dave <ankurdave@gmail.com>
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala6
-rw-r--r--graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala10
2 files changed, 16 insertions, 0 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
index 8a13c74221..2d6a825b61 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -133,6 +133,12 @@ object GraphGenerators {
// This ensures that the 4 quadrants are the same size at all recursion levels
val numVertices = math.round(
math.pow(2.0, math.ceil(math.log(requestedNumVertices) / math.log(2.0)))).toInt
+ val numEdgesUpperBound =
+ math.pow(2.0, 2 * ((math.log(numVertices) / math.log(2.0)) - 1)).toInt
+ if (numEdgesUpperBound < numEdges) {
+ throw new IllegalArgumentException(
+ s"numEdges must be <= $numEdgesUpperBound but was $numEdges")
+ }
var edges: Set[Edge[Int]] = Set()
while (edges.size < numEdges) {
if (edges.size % 100 == 0) {
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
index 3abefbe52f..8d9c8ddccb 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
@@ -110,4 +110,14 @@ class GraphGeneratorsSuite extends FunSuite with LocalSparkContext {
}
}
+ test("SPARK-5064 GraphGenerators.rmatGraph numEdges upper bound") {
+ withSpark { sc =>
+ val g1 = GraphGenerators.rmatGraph(sc, 4, 4)
+ assert(g1.edges.count() === 4)
+ intercept[IllegalArgumentException] {
+ val g2 = GraphGenerators.rmatGraph(sc, 4, 8)
+ }
+ }
+ }
+
}