aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
authorImran Rashid <irashid@cloudera.com>2015-11-06 20:06:24 +0000
committerSean Owen <sowen@cloudera.com>2015-11-06 20:06:24 +0000
commit49f1a820372d1cba41f3f00d07eb5728f2ed6705 (patch)
tree535797cc3662bfd7d8247b2d01f6fd00b2e1b2a9 /python/pyspark/sql/dataframe.py
parent62bb290773c9f9fa53cbe6d4eedc6e153761a763 (diff)
downloadspark-49f1a820372d1cba41f3f00d07eb5728f2ed6705.tar.gz
spark-49f1a820372d1cba41f3f00d07eb5728f2ed6705.tar.bz2
spark-49f1a820372d1cba41f3f00d07eb5728f2ed6705.zip
[SPARK-10116][CORE] XORShiftRandom.hashSeed is random in high bits
https://issues.apache.org/jira/browse/SPARK-10116 This is really trivial, just happened to notice it -- if `XORShiftRandom.hashSeed` is really supposed to have random bits throughout (as the comment implies), it needs to do something for the conversion to `long`. mengxr mkolod Author: Imran Rashid <irashid@cloudera.com> Closes #8314 from squito/SPARK-10116.
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 3baff81477..765a4511b6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -436,7 +436,7 @@ class DataFrame(object):
"""Returns a sampled subset of this :class:`DataFrame`.
>>> df.sample(False, 0.5, 42).count()
- 1
+ 2
"""
assert fraction >= 0.0, "Negative fraction value: %s" % fraction
seed = seed if seed is not None else random.randint(0, sys.maxsize)
@@ -463,8 +463,8 @@ class DataFrame(object):
+---+-----+
|key|count|
+---+-----+
- | 0| 3|
- | 1| 8|
+ | 0| 5|
+ | 1| 9|
+---+-----+
"""