[SPARK-7322, SPARK-7836, SPARK-7822][SQL] DataFrame window function related updates

1. ntile should take an integer as parameter. 2. Added Python API (based on #6364) 3. Update documentation of various DataFrame Python functions. Author: Davies Liu <davies@databricks.com> Author: Reynold Xin <rxin@databricks.com> Closes #6374 from rxin/window-final and squashes the following commits: 69004c7 [Reynold Xin] Style fix. 288cea9 [Reynold Xin] Update documentaiton. 7cb8985 [Reynold Xin] Merge pull request #6364 from davies/window 66092b4 [Davies Liu] update docs ed73cb4 [Reynold Xin] [SPARK-7322][SQL] Improve DataFrame window function documentation. ef55132 [Davies Liu] Merge branch 'master' of github.com:apache/spark into window4 8936ade [Davies Liu] fix maxint in python 3 2649358 [Davies Liu] update docs 778e2c0 [Davies Liu] SPARK-7836 and SPARK-7822: Python API of window functions (cherry picked from commit efe3bfdf496aa6206ace2697e31dd4c0c3c824fb) Signed-off-by: Yin Huai <yhuai@databricks.com>
author: Davies Liu <davies@databricks.com> 2015-05-23 08:30:05 -0700
committer: Yin Huai <yhuai@databricks.com> 2015-05-23 08:30:18 -0700
commit: d1515381cb957f40daf026144ce3ac014660df23 (patch)
tree: 3d4ec2c78cdc8629653ce99d31a432eefbdddc81 /sql/hive
parent: ea9db50bc3ade82fb9966df34961a17b255b86d7 (diff)
download: spark-d1515381cb957f40daf026144ce3ac014660df23.tar.gz
spark-d1515381cb957f40daf026144ce3ac014660df23.tar.bz2
spark-d1515381cb957f40daf026144ce3ac014660df23.zip
1 files changed, 8 insertions, 12 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
index 6cea6776c8..efb3f2545d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
@@ -31,8 +31,8 @@ class HiveDataFrameWindowSuite extends QueryTest {
 
     checkAnswer(
       df.select(
-        lead("key").over(w),
-        lead("value").over(w)),
+        lead("key", 1).over(w),
+        lead("value", 1).over(w)),
       Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
   }
 
@@ -42,8 +42,8 @@ class HiveDataFrameWindowSuite extends QueryTest {
 
     checkAnswer(
       df.select(
-        lead("key").over(w),
-        lead("value").over(w)),
+        lead("key", 1).over(w),
+        lead("value", 1).over(w)),
       Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
   }
 
@@ -53,7 +53,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
 
     checkAnswer(
       df.select(
-        lead("value").over(Window.partitionBy($"key").orderBy($"value"))),
+        lead("value", 1).over(Window.partitionBy($"key").orderBy($"value"))),
       sql(
         """SELECT
           | lead(value) OVER (PARTITION BY key ORDER BY value)
@@ -66,9 +66,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
 
     checkAnswer(
       df.select(
-        lag("value").over(
-          Window.partitionBy($"key")
-          .orderBy($"value"))),
+        lag("value", 1).over(Window.partitionBy($"key").orderBy($"value"))),
       sql(
         """SELECT
           | lag(value) OVER (PARTITION BY key ORDER BY value)
@@ -112,8 +110,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
         mean("key").over(Window.partitionBy("value").orderBy("key")),
         count("key").over(Window.partitionBy("value").orderBy("key")),
         sum("key").over(Window.partitionBy("value").orderBy("key")),
-        ntile("key").over(Window.partitionBy("value").orderBy("key")),
-        ntile($"key").over(Window.partitionBy("value").orderBy("key")),
+        ntile(2).over(Window.partitionBy("value").orderBy("key")),
         rowNumber().over(Window.partitionBy("value").orderBy("key")),
         denseRank().over(Window.partitionBy("value").orderBy("key")),
         rank().over(Window.partitionBy("value").orderBy("key")),
@@ -127,8 +124,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
            |avg(key) over (partition by value order by key),
            |count(key) over (partition by value order by key),
            |sum(key) over (partition by value order by key),
-           |ntile(key) over (partition by value order by key),
-           |ntile(key) over (partition by value order by key),
+           |ntile(2) over (partition by value order by key),
            |row_number() over (partition by value order by key),
            |dense_rank() over (partition by value order by key),
            |rank() over (partition by value order by key),
author	Davies Liu <davies@databricks.com>	2015-05-23 08:30:05 -0700
committer	Yin Huai <yhuai@databricks.com>	2015-05-23 08:30:18 -0700
commit	d1515381cb957f40daf026144ce3ac014660df23 (patch)
tree	3d4ec2c78cdc8629653ce99d31a432eefbdddc81 /sql/hive
parent	ea9db50bc3ade82fb9966df34961a17b255b86d7 (diff)
download	spark-d1515381cb957f40daf026144ce3ac014660df23.tar.gz spark-d1515381cb957f40daf026144ce3ac014660df23.tar.bz2 spark-d1515381cb957f40daf026144ce3ac014660df23.zip