[SPARK-11395][SPARKR] Support over and window specification in SparkR.

This PR: 1. Implement WindowSpec S4 class. 2. Implement Window.partitionBy() and Window.orderBy() as utility functions to create WindowSpec objects. 3. Implement over() of Column class. Author: Sun Rui <rui.sun@intel.com> Author: Sun Rui <sunrui2016@gmail.com> Closes #10094 from sun-rui/SPARK-11395.
author: Sun Rui <rui.sun@intel.com> 2016-05-05 18:49:43 -0700
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2016-05-05 18:49:43 -0700
commit: 157a49aa410dc1870cd171148d317084c5a90d23 (patch)
tree: c4d4d81b171c24308c70c8289351c0e7c497ff98 /R/pkg/inst/tests/testthat/test_sparkSQL.R
parent: 7f5922aa4a810a0b9cc783956a8b7aa3dad86a0a (diff)
download: spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.gz
spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.bz2
spark-157a49aa410dc1870cd171148d317084c5a90d23.zip
1 files changed, 36 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3b6a27c3b8..0f67bc2e33 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2118,6 +2118,42 @@ test_that("repartition by columns on DataFrame", {
   expect_equal(nrow(df1), 2)
 })
 
+test_that("Window functions on a DataFrame", {
+  ssc <- callJMethod(sc, "sc")
+  hiveCtx <- tryCatch({
+    newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+  },
+  error = function(err) {
+    skip("Hive is not build with SparkSQL, skipped")
+  })
+
+  df <- createDataFrame(hiveCtx,
+                        list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
+                        schema = c("key", "value"))
+  ws <- orderBy(window.partitionBy("key"), "value")
+  result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+  names(result) <- c("key", "value")
+  expected <- data.frame(key = c(1L, NA, 2L, NA),
+                       value = c("1", NA, "2", NA),
+                       stringsAsFactors = FALSE)
+  expect_equal(result, expected)
+
+  ws <- orderBy(window.partitionBy(df$key), df$value)
+  result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+  names(result) <- c("key", "value")
+  expect_equal(result, expected)
+
+  ws <- partitionBy(window.orderBy("value"), "key")
+  result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+  names(result) <- c("key", "value")
+  expect_equal(result, expected)
+
+  ws <- partitionBy(window.orderBy(df$value), df$key)
+  result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+  names(result) <- c("key", "value")
+  expect_equal(result, expected)
+})
+
 unlink(parquetPath)
 unlink(jsonPath)
 unlink(jsonPathNa)
author	Sun Rui <rui.sun@intel.com>	2016-05-05 18:49:43 -0700
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2016-05-05 18:49:43 -0700
commit	157a49aa410dc1870cd171148d317084c5a90d23 (patch)
tree	c4d4d81b171c24308c70c8289351c0e7c497ff98 /R/pkg/inst/tests/testthat/test_sparkSQL.R
parent	7f5922aa4a810a0b9cc783956a8b7aa3dad86a0a (diff)
download	spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.gz spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.bz2 spark-157a49aa410dc1870cd171148d317084c5a90d23.zip