aboutsummaryrefslogtreecommitdiff
path: root/graphx/src/test
diff options
context:
space:
mode:
authorRJ Nowling <rnowling@gmail.com>2014-09-03 14:15:22 -0700
committerAnkur Dave <ankurdave@gmail.com>2014-09-03 14:16:06 -0700
commite5d376801d57dffb0791980a1786a0a9b45bc491 (patch)
treeafc3ab3a0fdbfd3e076d0ace02e6a2bd4f5ce0ff /graphx/src/test
parent6481d27425f6d42ead36663c9a4ef7ee13b3a8c9 (diff)
downloadspark-e5d376801d57dffb0791980a1786a0a9b45bc491.tar.gz
spark-e5d376801d57dffb0791980a1786a0a9b45bc491.tar.bz2
spark-e5d376801d57dffb0791980a1786a0a9b45bc491.zip
[SPARK-3263][GraphX] Fix changes made to GraphGenerator.logNormalGraph in PR #720
PR #720 made multiple changes to GraphGenerator.logNormalGraph including: * Replacing the call to functions for generating random vertices and edges with in-line implementations with different equations. Based on reading the Pregel paper, I believe the in-line functions are incorrect. * Hard-coding of RNG seeds so that method now generates the same graph for a given number of vertices, edges, mu, and sigma -- user is not able to override seed or specify that seed should be randomly generated. * Backwards-incompatible change to logNormalGraph signature with introduction of new required parameter. * Failed to update scala docs and programming guide for API changes * Added a Synthetic Benchmark in the examples. This PR: * Removes the in-line calls and calls original vertex / edge generation functions again * Adds an optional seed parameter for deterministic behavior (when desired) * Keeps the number of partitions parameter that was added. * Keeps compatibility with the synthetic benchmark example * Maintains backwards-compatible API Author: RJ Nowling <rnowling@gmail.com> Author: Ankur Dave <ankurdave@gmail.com> Closes #2168 from rnowling/graphgenrand and squashes the following commits: f1cd79f [Ankur Dave] Style fixes e11918e [RJ Nowling] Fix bad comparisons in unit tests 785ac70 [RJ Nowling] Fix style error c70868d [RJ Nowling] Fix logNormalGraph scala doc for seed 41fd1f8 [RJ Nowling] Fix logNormalGraph scala doc for seed 799f002 [RJ Nowling] Added test for different seeds for sampleLogNormal 43949ad [RJ Nowling] Added test for different seeds for generateRandomEdges 2faf75f [RJ Nowling] Added unit test for logNormalGraph 82f22397 [RJ Nowling] Add unit test for sampleLogNormal b99cba9 [RJ Nowling] Make sampleLogNormal private to Spark (vs private) for unit testing 6803da1 [RJ Nowling] Add GraphGeneratorsSuite with test for generateRandomEdges 1c8fc44 [RJ Nowling] Connected components part of SynthBenchmark was failing to call count on RDD before printing dfbb6dd [RJ Nowling] Fix parameter name in SynthBenchmark docs b5eeb80 [RJ Nowling] Add optional seed parameter to SynthBenchmark and set default to randomly generate a seed 1ff8d30 [RJ Nowling] Fix bug in generateRandomEdges where numVertices instead of numEdges was used to control number of edges to generate 98bb73c [RJ Nowling] Add documentation for logNormalGraph parameters d40141a [RJ Nowling] Fix style error 684804d [RJ Nowling] revert PR #720 which introduce errors in logNormalGraph and messed up seeding of RNGs. Add user-defined optional seed for deterministic behavior c183136 [RJ Nowling] Fix to deterministic GraphGenerators.logNormalGraph that allows generating graphs randomly using optional seed. 015010c [RJ Nowling] Fixed GraphGenerator logNormalGraph API to make backward-incompatible change in commit 894ecde04
Diffstat (limited to 'graphx/src/test')
-rw-r--r--graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala110
1 files changed, 110 insertions, 0 deletions
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
new file mode 100644
index 0000000000..b346d4db2e
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/GraphGeneratorsSuite.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx.util
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx.LocalSparkContext
+
+class GraphGeneratorsSuite extends FunSuite with LocalSparkContext {
+
+ test("GraphGenerators.generateRandomEdges") {
+ val src = 5
+ val numEdges10 = 10
+ val numEdges20 = 20
+ val maxVertexId = 100
+
+ val edges10 = GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId)
+ assert(edges10.length == numEdges10)
+
+ val correctSrc = edges10.forall(e => e.srcId == src)
+ assert(correctSrc)
+
+ val correctWeight = edges10.forall(e => e.attr == 1)
+ assert(correctWeight)
+
+ val correctRange = edges10.forall(e => e.dstId >= 0 && e.dstId <= maxVertexId)
+ assert(correctRange)
+
+ val edges20 = GraphGenerators.generateRandomEdges(src, numEdges20, maxVertexId)
+ assert(edges20.length == numEdges20)
+
+ val edges10_round1 =
+ GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId, seed = 12345)
+ val edges10_round2 =
+ GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId, seed = 12345)
+ assert(edges10_round1.zip(edges10_round2).forall { case (e1, e2) =>
+ e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+ })
+
+ val edges10_round3 =
+ GraphGenerators.generateRandomEdges(src, numEdges10, maxVertexId, seed = 3467)
+ assert(!edges10_round1.zip(edges10_round3).forall { case (e1, e2) =>
+ e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+ })
+ }
+
+ test("GraphGenerators.sampleLogNormal") {
+ val mu = 4.0
+ val sigma = 1.3
+ val maxVal = 100
+
+ val dstId = GraphGenerators.sampleLogNormal(mu, sigma, maxVal)
+ assert(dstId < maxVal)
+
+ val dstId_round1 = GraphGenerators.sampleLogNormal(mu, sigma, maxVal, 12345)
+ val dstId_round2 = GraphGenerators.sampleLogNormal(mu, sigma, maxVal, 12345)
+ assert(dstId_round1 == dstId_round2)
+
+ val dstId_round3 = GraphGenerators.sampleLogNormal(mu, sigma, maxVal, 789)
+ assert(dstId_round1 != dstId_round3)
+ }
+
+ test("GraphGenerators.logNormalGraph") {
+ withSpark { sc =>
+ val mu = 4.0
+ val sigma = 1.3
+ val numVertices100 = 100
+
+ val graph = GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma)
+ assert(graph.vertices.count() == numVertices100)
+
+ val graph_round1 =
+ GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma, seed = 12345)
+ val graph_round2 =
+ GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma, seed = 12345)
+
+ val graph_round1_edges = graph_round1.edges.collect()
+ val graph_round2_edges = graph_round2.edges.collect()
+
+ assert(graph_round1_edges.zip(graph_round2_edges).forall { case (e1, e2) =>
+ e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+ })
+
+ val graph_round3 =
+ GraphGenerators.logNormalGraph(sc, numVertices100, mu = mu, sigma = sigma, seed = 567)
+
+ val graph_round3_edges = graph_round3.edges.collect()
+
+ assert(!graph_round1_edges.zip(graph_round3_edges).forall { case (e1, e2) =>
+ e1.srcId == e2.srcId && e1.dstId == e2.dstId && e1.attr == e2.attr
+ })
+ }
+ }
+
+}