aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst
diff options
context:
space:
mode:
authorSun Rui <rui.sun@intel.com>2016-05-05 18:49:43 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-05-05 18:49:43 -0700
commit157a49aa410dc1870cd171148d317084c5a90d23 (patch)
treec4d4d81b171c24308c70c8289351c0e7c497ff98 /R/pkg/inst
parent7f5922aa4a810a0b9cc783956a8b7aa3dad86a0a (diff)
downloadspark-157a49aa410dc1870cd171148d317084c5a90d23.tar.gz
spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.bz2
spark-157a49aa410dc1870cd171148d317084c5a90d23.zip
[SPARK-11395][SPARKR] Support over and window specification in SparkR.
This PR: 1. Implement WindowSpec S4 class. 2. Implement Window.partitionBy() and Window.orderBy() as utility functions to create WindowSpec objects. 3. Implement over() of Column class. Author: Sun Rui <rui.sun@intel.com> Author: Sun Rui <sunrui2016@gmail.com> Closes #10094 from sun-rui/SPARK-11395.
Diffstat (limited to 'R/pkg/inst')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R36
1 files changed, 36 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3b6a27c3b8..0f67bc2e33 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2118,6 +2118,42 @@ test_that("repartition by columns on DataFrame", {
expect_equal(nrow(df1), 2)
})
+test_that("Window functions on a DataFrame", {
+ ssc <- callJMethod(sc, "sc")
+ hiveCtx <- tryCatch({
+ newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+ },
+ error = function(err) {
+ skip("Hive is not build with SparkSQL, skipped")
+ })
+
+ df <- createDataFrame(hiveCtx,
+ list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")),
+ schema = c("key", "value"))
+ ws <- orderBy(window.partitionBy("key"), "value")
+ result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+ names(result) <- c("key", "value")
+ expected <- data.frame(key = c(1L, NA, 2L, NA),
+ value = c("1", NA, "2", NA),
+ stringsAsFactors = FALSE)
+ expect_equal(result, expected)
+
+ ws <- orderBy(window.partitionBy(df$key), df$value)
+ result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+ names(result) <- c("key", "value")
+ expect_equal(result, expected)
+
+ ws <- partitionBy(window.orderBy("value"), "key")
+ result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+ names(result) <- c("key", "value")
+ expect_equal(result, expected)
+
+ ws <- partitionBy(window.orderBy(df$value), df$key)
+ result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws)))
+ names(result) <- c("key", "value")
+ expect_equal(result, expected)
+})
+
unlink(parquetPath)
unlink(jsonPath)
unlink(jsonPathNa)