aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-02-26 23:22:30 -0800
committerReynold Xin <rxin@apache.org>2014-02-26 23:22:30 -0800
commit5a3ad107c01dbc09cd144376ca808125ee821093 (patch)
treeb178e8444a595c6a71b5fa7c41f37ba9440f1a81
parent71f69d66ce50991e99408791ade25a670598d32a (diff)
downloadspark-5a3ad107c01dbc09cd144376ca808125ee821093.tar.gz
spark-5a3ad107c01dbc09cd144376ca808125ee821093.tar.bz2
spark-5a3ad107c01dbc09cd144376ca808125ee821093.zip
SPARK-1129: use a predefined seed when seed is zero in XORShiftRandom
If the seed is zero, XORShift generates all zeros, which would create unexpected result. JIRA: https://spark-project.atlassian.net/browse/SPARK-1129 Author: Xiangrui Meng <meng@databricks.com> Closes #645 from mengxr/xor and squashes the following commits: 1b086ab [Xiangrui Meng] use MurmurHash3 to set seed in XORShiftRandom 45c6f16 [Xiangrui Meng] minor style change 51f4050 [Xiangrui Meng] use a predefined seed when seed is zero in XORShiftRandom
-rw-r--r--core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala15
-rw-r--r--core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala4
2 files changed, 16 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index ca611b67ed..8a4cdea2fa 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -17,8 +17,11 @@
package org.apache.spark.util.random
+import java.nio.ByteBuffer
import java.util.{Random => JavaRandom}
+import scala.util.hashing.MurmurHash3
+
import org.apache.spark.util.Utils.timeIt
/**
@@ -36,8 +39,8 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
def this() = this(System.nanoTime)
- private var seed = init
-
+ private var seed = XORShiftRandom.hashSeed(init)
+
// we need to just override next - this will be called by nextInt, nextDouble,
// nextGaussian, nextLong, etc.
override protected def next(bits: Int): Int = {
@@ -49,13 +52,19 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
}
override def setSeed(s: Long) {
- seed = s
+ seed = XORShiftRandom.hashSeed(s)
}
}
/** Contains benchmark method and main method to run benchmark of the RNG */
private[spark] object XORShiftRandom {
+ /** Hash seeds to have 0/1 bits throughout. */
+ private def hashSeed(seed: Long): Long = {
+ val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
+ MurmurHash3.bytesHash(bytes)
+ }
+
/**
* Main method for running benchmark
* @param args takes one argument - the number of random numbers to generate
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
index c51d12bfe0..757476efdb 100644
--- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
@@ -72,4 +72,8 @@ class XORShiftRandomSuite extends FunSuite with ShouldMatchers {
}
+ test ("XORShift with zero seed") {
+ val random = new XORShiftRandom(0L)
+ assert(random.nextInt() != 0)
+ }
}