diff options
author | Davies Liu <davies@databricks.com> | 2015-08-14 13:55:29 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-08-14 13:55:29 -0700 |
commit | 11ed2b180ec86523a94679a8b8132fadb911ccd5 (patch) | |
tree | 308f29be0ed48ffbff823fdacd0f37410af1a869 /python/pyspark/sql/tests.py | |
parent | 9407baa2a7c26f527f2d043715d313d75bd765bb (diff) | |
download | spark-11ed2b180ec86523a94679a8b8132fadb911ccd5.tar.gz spark-11ed2b180ec86523a94679a8b8132fadb911ccd5.tar.bz2 spark-11ed2b180ec86523a94679a8b8132fadb911ccd5.zip |
[SPARK-9978] [PYSPARK] [SQL] fix Window.orderBy and doc of ntile()
Author: Davies Liu <davies@databricks.com>
Closes #8213 from davies/fix_window.
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r-- | python/pyspark/sql/tests.py | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 38c83c427a..9b748101b5 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1124,5 +1124,28 @@ class HiveContextSQLTests(ReusedPySparkTestCase): for r, ex in zip(rs, expected): self.assertEqual(tuple(r), ex[:len(r)]) + def test_window_functions_without_partitionBy(self): + df = self.sqlCtx.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) + w = Window.orderBy("key", df.value) + from pyspark.sql import functions as F + sel = df.select(df.value, df.key, + F.max("key").over(w.rowsBetween(0, 1)), + F.min("key").over(w.rowsBetween(0, 1)), + F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))), + F.rowNumber().over(w), + F.rank().over(w), + F.denseRank().over(w), + F.ntile(2).over(w)) + rs = sorted(sel.collect()) + expected = [ + ("1", 1, 1, 1, 4, 1, 1, 1, 1), + ("2", 1, 1, 1, 4, 2, 2, 2, 1), + ("2", 1, 2, 1, 4, 3, 2, 2, 2), + ("2", 2, 2, 2, 4, 4, 4, 3, 2) + ] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + + if __name__ == "__main__": unittest.main() |