aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/tests.py
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-08-06 17:03:14 -0700
committerReynold Xin <rxin@databricks.com>2015-08-06 17:03:14 -0700
commitbaf4587a569b49e39020c04c2785041bdd00789b (patch)
tree183268b4d81c6e773d1c305cfff3bcada319cc06 /python/pyspark/sql/tests.py
parent681e3024b6c2fcb54b42180d94d3ba3eed52a2d4 (diff)
downloadspark-baf4587a569b49e39020c04c2785041bdd00789b.tar.gz
spark-baf4587a569b49e39020c04c2785041bdd00789b.tar.bz2
spark-baf4587a569b49e39020c04c2785041bdd00789b.zip
[SPARK-9691] [SQL] PySpark SQL rand function treats seed 0 as no seed
https://issues.apache.org/jira/browse/SPARK-9691 jkbradley rxin Author: Yin Huai <yhuai@databricks.com> Closes #7999 from yhuai/pythonRand and squashes the following commits: 4187e0c [Yin Huai] Regression test. a985ef9 [Yin Huai] Use "if seed is not None" instead "if seed" because "if seed" returns false when seed is 0.
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r--python/pyspark/sql/tests.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index ebd3ea8db6..1e3444dd9e 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -629,6 +629,16 @@ class SQLTests(ReusedPySparkTestCase):
for row in rndn:
assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1]
+ # If the specified seed is 0, we should use it.
+ # https://issues.apache.org/jira/browse/SPARK-9691
+ rnd1 = df.select('key', functions.rand(0)).collect()
+ rnd2 = df.select('key', functions.rand(0)).collect()
+ self.assertEqual(sorted(rnd1), sorted(rnd2))
+
+ rndn1 = df.select('key', functions.randn(0)).collect()
+ rndn2 = df.select('key', functions.randn(0)).collect()
+ self.assertEqual(sorted(rndn1), sorted(rndn2))
+
def test_between_function(self):
df = self.sc.parallelize([
Row(a=1, b=2, c=3),