aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-05-23 08:30:05 -0700
committerYin Huai <yhuai@databricks.com>2015-05-23 08:30:05 -0700
commitefe3bfdf496aa6206ace2697e31dd4c0c3c824fb (patch)
treea6c0adbff3ff029c0e87ceff4180f6b3c99ea5ff /sql/hive
parentad0badba1450295982738934da2cc121cde18213 (diff)
downloadspark-efe3bfdf496aa6206ace2697e31dd4c0c3c824fb.tar.gz
spark-efe3bfdf496aa6206ace2697e31dd4c0c3c824fb.tar.bz2
spark-efe3bfdf496aa6206ace2697e31dd4c0c3c824fb.zip
[SPARK-7322, SPARK-7836, SPARK-7822][SQL] DataFrame window function related updates
1. ntile should take an integer as parameter. 2. Added Python API (based on #6364) 3. Update documentation of various DataFrame Python functions. Author: Davies Liu <davies@databricks.com> Author: Reynold Xin <rxin@databricks.com> Closes #6374 from rxin/window-final and squashes the following commits: 69004c7 [Reynold Xin] Style fix. 288cea9 [Reynold Xin] Update documentaiton. 7cb8985 [Reynold Xin] Merge pull request #6364 from davies/window 66092b4 [Davies Liu] update docs ed73cb4 [Reynold Xin] [SPARK-7322][SQL] Improve DataFrame window function documentation. ef55132 [Davies Liu] Merge branch 'master' of github.com:apache/spark into window4 8936ade [Davies Liu] fix maxint in python 3 2649358 [Davies Liu] update docs 778e2c0 [Davies Liu] SPARK-7836 and SPARK-7822: Python API of window functions
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala20
1 files changed, 8 insertions, 12 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
index 6cea6776c8..efb3f2545d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala
@@ -31,8 +31,8 @@ class HiveDataFrameWindowSuite extends QueryTest {
checkAnswer(
df.select(
- lead("key").over(w),
- lead("value").over(w)),
+ lead("key", 1).over(w),
+ lead("value", 1).over(w)),
Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
}
@@ -42,8 +42,8 @@ class HiveDataFrameWindowSuite extends QueryTest {
checkAnswer(
df.select(
- lead("key").over(w),
- lead("value").over(w)),
+ lead("key", 1).over(w),
+ lead("value", 1).over(w)),
Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
}
@@ -53,7 +53,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
checkAnswer(
df.select(
- lead("value").over(Window.partitionBy($"key").orderBy($"value"))),
+ lead("value", 1).over(Window.partitionBy($"key").orderBy($"value"))),
sql(
"""SELECT
| lead(value) OVER (PARTITION BY key ORDER BY value)
@@ -66,9 +66,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
checkAnswer(
df.select(
- lag("value").over(
- Window.partitionBy($"key")
- .orderBy($"value"))),
+ lag("value", 1).over(Window.partitionBy($"key").orderBy($"value"))),
sql(
"""SELECT
| lag(value) OVER (PARTITION BY key ORDER BY value)
@@ -112,8 +110,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
mean("key").over(Window.partitionBy("value").orderBy("key")),
count("key").over(Window.partitionBy("value").orderBy("key")),
sum("key").over(Window.partitionBy("value").orderBy("key")),
- ntile("key").over(Window.partitionBy("value").orderBy("key")),
- ntile($"key").over(Window.partitionBy("value").orderBy("key")),
+ ntile(2).over(Window.partitionBy("value").orderBy("key")),
rowNumber().over(Window.partitionBy("value").orderBy("key")),
denseRank().over(Window.partitionBy("value").orderBy("key")),
rank().over(Window.partitionBy("value").orderBy("key")),
@@ -127,8 +124,7 @@ class HiveDataFrameWindowSuite extends QueryTest {
|avg(key) over (partition by value order by key),
|count(key) over (partition by value order by key),
|sum(key) over (partition by value order by key),
- |ntile(key) over (partition by value order by key),
- |ntile(key) over (partition by value order by key),
+ |ntile(2) over (partition by value order by key),
|row_number() over (partition by value order by key),
|dense_rank() over (partition by value order by key),
|rank() over (partition by value order by key),