aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/rddsampler.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/rddsampler.py')
-rw-r--r--python/pyspark/rddsampler.py11
1 files changed, 5 insertions, 6 deletions
diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py
index 55e247da0e..a6e81067cf 100644
--- a/python/pyspark/rddsampler.py
+++ b/python/pyspark/rddsampler.py
@@ -40,14 +40,13 @@ class RDDSamplerBase(object):
def initRandomGenerator(self, split):
if self._use_numpy:
import numpy
- self._random = numpy.random.RandomState(self._seed)
+ self._random = numpy.random.RandomState(self._seed ^ split)
else:
- self._random = random.Random(self._seed)
+ self._random = random.Random(self._seed ^ split)
- for _ in range(0, split):
- # discard the next few values in the sequence to have a
- # different seed for the different splits
- self._random.randint(0, sys.maxint)
+ # mixing because the initial seeds are close to each other
+ for _ in xrange(10):
+ self._random.randint(0, 1)
self._split = split
self._rand_initialized = True