aboutsummaryrefslogtreecommitdiff
path: root/R/pkg/inst/tests
diff options
context:
space:
mode:
authorClark Fitzgerald <clarkfitzg@gmail.com>2016-09-06 23:40:37 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-09-06 23:40:37 -0700
commit9fccde4ff80fb0fd65a9e90eb3337965e4349de4 (patch)
treeb77ec279d579d7c5e6603b8022926c6d79865362 /R/pkg/inst/tests
parenteb1ab88a86ce35f3d6ba03b3a798099fbcf6b3fc (diff)
downloadspark-9fccde4ff80fb0fd65a9e90eb3337965e4349de4.tar.gz
spark-9fccde4ff80fb0fd65a9e90eb3337965e4349de4.tar.bz2
spark-9fccde4ff80fb0fd65a9e90eb3337965e4349de4.zip
[SPARK-16785] R dapply doesn't return array or raw columns
## What changes were proposed in this pull request? Fixed bug in `dapplyCollect` by changing the `compute` function of `worker.R` to explicitly handle raw (binary) vectors. cc shivaram ## How was this patch tested? Unit tests Author: Clark Fitzgerald <clarkfitzg@gmail.com> Closes #14783 from clarkfitzg/SPARK-16785.
Diffstat (limited to 'R/pkg/inst/tests')
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R21
-rw-r--r--R/pkg/inst/tests/testthat/test_utils.R24
2 files changed, 45 insertions, 0 deletions
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index aac3f62204..a9bd325895 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2270,6 +2270,27 @@ test_that("dapply() and dapplyCollect() on a DataFrame", {
expect_identical(expected, result)
})
+test_that("dapplyCollect() on DataFrame with a binary column", {
+
+ df <- data.frame(key = 1:3)
+ df$bytes <- lapply(df$key, serialize, connection = NULL)
+
+ df_spark <- createDataFrame(df)
+
+ result1 <- collect(df_spark)
+ expect_identical(df, result1)
+
+ result2 <- dapplyCollect(df_spark, function(x) x)
+ expect_identical(df, result2)
+
+ # A data.frame with a single column of bytes
+ scb <- subset(df, select = "bytes")
+ scb_spark <- createDataFrame(scb)
+ result <- dapplyCollect(scb_spark, function(x) x)
+ expect_identical(scb, result)
+
+})
+
test_that("repartition by columns on DataFrame", {
df <- createDataFrame(
list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3", 0.3)),
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index 83e94a1432..77f25292f3 100644
--- a/R/pkg/inst/tests/testthat/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
@@ -183,4 +183,28 @@ test_that("overrideEnvs", {
expect_equal(config[["config_only"]], "ok")
})
+test_that("rbindRaws", {
+
+ # Mixed Column types
+ r <- serialize(1:5, connection = NULL)
+ r1 <- serialize(1, connection = NULL)
+ r2 <- serialize(letters, connection = NULL)
+ r3 <- serialize(1:10, connection = NULL)
+ inputData <- list(list(1L, r1, "a", r), list(2L, r2, "b", r),
+ list(3L, r3, "c", r))
+ expected <- data.frame(V1 = 1:3)
+ expected$V2 <- list(r1, r2, r3)
+ expected$V3 <- c("a", "b", "c")
+ expected$V4 <- list(r, r, r)
+ result <- rbindRaws(inputData)
+ expect_equal(expected, result)
+
+ # Single binary column
+ input <- list(list(r1), list(r2), list(r3))
+ expected <- subset(expected, select = "V2")
+ result <- setNames(rbindRaws(input), "V2")
+ expect_equal(expected, result)
+
+})
+
sparkR.session.stop()