diff options
author | Sun Rui <rui.sun@intel.com> | 2016-05-05 18:49:43 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2016-05-05 18:49:43 -0700 |
commit | 157a49aa410dc1870cd171148d317084c5a90d23 (patch) | |
tree | c4d4d81b171c24308c70c8289351c0e7c497ff98 /R/pkg/inst/tests/testthat/test_sparkSQL.R | |
parent | 7f5922aa4a810a0b9cc783956a8b7aa3dad86a0a (diff) | |
download | spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.gz spark-157a49aa410dc1870cd171148d317084c5a90d23.tar.bz2 spark-157a49aa410dc1870cd171148d317084c5a90d23.zip |
[SPARK-11395][SPARKR] Support over and window specification in SparkR.
This PR:
1. Implement WindowSpec S4 class.
2. Implement Window.partitionBy() and Window.orderBy() as utility functions to create WindowSpec objects.
3. Implement over() of Column class.
Author: Sun Rui <rui.sun@intel.com>
Author: Sun Rui <sunrui2016@gmail.com>
Closes #10094 from sun-rui/SPARK-11395.
Diffstat (limited to 'R/pkg/inst/tests/testthat/test_sparkSQL.R')
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 3b6a27c3b8..0f67bc2e33 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -2118,6 +2118,42 @@ test_that("repartition by columns on DataFrame", { expect_equal(nrow(df1), 2) }) +test_that("Window functions on a DataFrame", { + ssc <- callJMethod(sc, "sc") + hiveCtx <- tryCatch({ + newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc) + }, + error = function(err) { + skip("Hive is not build with SparkSQL, skipped") + }) + + df <- createDataFrame(hiveCtx, + list(list(1L, "1"), list(2L, "2"), list(1L, "1"), list(2L, "2")), + schema = c("key", "value")) + ws <- orderBy(window.partitionBy("key"), "value") + result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws))) + names(result) <- c("key", "value") + expected <- data.frame(key = c(1L, NA, 2L, NA), + value = c("1", NA, "2", NA), + stringsAsFactors = FALSE) + expect_equal(result, expected) + + ws <- orderBy(window.partitionBy(df$key), df$value) + result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws))) + names(result) <- c("key", "value") + expect_equal(result, expected) + + ws <- partitionBy(window.orderBy("value"), "key") + result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws))) + names(result) <- c("key", "value") + expect_equal(result, expected) + + ws <- partitionBy(window.orderBy(df$value), df$key) + result <- collect(select(df, over(lead("key", 1), ws), over(lead("value", 1), ws))) + names(result) <- c("key", "value") + expect_equal(result, expected) +}) + unlink(parquetPath) unlink(jsonPath) unlink(jsonPathNa) |