diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-06-20 21:09:39 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2016-06-20 21:09:39 -0700 |
commit | 217db56ba11fcdf9e3a81946667d1d99ad7344ee (patch) | |
tree | c126e953440cf3ac4e902ea259d4a9ec1b60bd6e /R/pkg/inst/tests | |
parent | a46553cbacf0e4012df89fe55385dec5beaa680a (diff) | |
download | spark-217db56ba11fcdf9e3a81946667d1d99ad7344ee.tar.gz spark-217db56ba11fcdf9e3a81946667d1d99ad7344ee.tar.bz2 spark-217db56ba11fcdf9e3a81946667d1d99ad7344ee.zip |
[SPARK-15294][R] Add `pivot` to SparkR
## What changes were proposed in this pull request?
This PR adds `pivot` function to SparkR for API parity. Since this PR is based on https://github.com/apache/spark/pull/13295 , mhnatiuk should be credited for the work he did.
## How was this patch tested?
Pass the Jenkins tests (including new testcase.)
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #13786 from dongjoon-hyun/SPARK-15294.
Diffstat (limited to 'R/pkg/inst/tests')
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index d53c40d423..7c192fb5a0 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1398,6 +1398,31 @@ test_that("group by, agg functions", { unlink(jsonPath3) }) +test_that("pivot GroupedData column", { + df <- createDataFrame(data.frame( + earnings = c(10000, 10000, 11000, 15000, 12000, 20000, 21000, 22000), + course = c("R", "Python", "R", "Python", "R", "Python", "R", "Python"), + year = c(2013, 2013, 2014, 2014, 2015, 2015, 2016, 2016) + )) + sum1 <- collect(sum(pivot(groupBy(df, "year"), "course"), "earnings")) + sum2 <- collect(sum(pivot(groupBy(df, "year"), "course", c("Python", "R")), "earnings")) + sum3 <- collect(sum(pivot(groupBy(df, "year"), "course", list("Python", "R")), "earnings")) + sum4 <- collect(sum(pivot(groupBy(df, "year"), "course", "R"), "earnings")) + + correct_answer <- data.frame( + year = c(2013, 2014, 2015, 2016), + Python = c(10000, 15000, 20000, 22000), + R = c(10000, 11000, 12000, 21000) + ) + expect_equal(sum1, correct_answer) + expect_equal(sum2, correct_answer) + expect_equal(sum3, correct_answer) + expect_equal(sum4, correct_answer[, c("year", "R")]) + + expect_error(collect(sum(pivot(groupBy(df, "year"), "course", c("R", "R")), "earnings"))) + expect_error(collect(sum(pivot(groupBy(df, "year"), "course", list("R", "R")), "earnings"))) +}) + test_that("arrange() and orderBy() on a DataFrame", { df <- read.json(jsonPath) sorted <- arrange(df, df$age) |