aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzero323 <zero323@users.noreply.github.com>2017-04-19 21:19:46 -0700
committerFelix Cheung <felixcheung@apache.org>2017-04-19 21:19:46 -0700
commit46c5749768fefd976097c7d5612ec184a4cfe1b9 (patch)
tree5e8ce2ee22d848cae449df228c50dfab61e85d34
parentbdc60569196e9ae4e9086c3e514a406a9e8b23a6 (diff)
downloadspark-46c5749768fefd976097c7d5612ec184a4cfe1b9.tar.gz
spark-46c5749768fefd976097c7d5612ec184a4cfe1b9.tar.bz2
spark-46c5749768fefd976097c7d5612ec184a4cfe1b9.zip
[SPARK-20375][R] R wrappers for array and map
## What changes were proposed in this pull request? Adds wrappers for `o.a.s.sql.functions.array` and `o.a.s.sql.functions.map` ## How was this patch tested? Unit tests, `check-cran.sh` Author: zero323 <zero323@users.noreply.github.com> Closes #17674 from zero323/SPARK-20375.
-rw-r--r--R/pkg/NAMESPACE2
-rw-r--r--R/pkg/R/functions.R53
-rw-r--r--R/pkg/R/generics.R8
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R17
4 files changed, 80 insertions, 0 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ca45c6f9b0..b6b559adf0 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -213,6 +213,8 @@ exportMethods("%in%",
"count",
"countDistinct",
"crc32",
+ "create_array",
+ "create_map",
"hash",
"cume_dist",
"date_add",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index c311921fb3..f854df11e5 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3652,3 +3652,56 @@ setMethod("posexplode",
jc <- callJStatic("org.apache.spark.sql.functions", "posexplode", x@jc)
column(jc)
})
+
+#' create_array
+#'
+#' Creates a new array column. The input columns must all have the same data type.
+#'
+#' @param x Column to compute on
+#' @param ... additional Column(s).
+#'
+#' @family normal_funcs
+#' @rdname create_array
+#' @name create_array
+#' @aliases create_array,Column-method
+#' @export
+#' @examples \dontrun{create_array(df$x, df$y, df$z)}
+#' @note create_array since 2.3.0
+setMethod("create_array",
+ signature(x = "Column"),
+ function(x, ...) {
+ jcols <- lapply(list(x, ...), function (x) {
+ stopifnot(class(x) == "Column")
+ x@jc
+ })
+ jc <- callJStatic("org.apache.spark.sql.functions", "array", jcols)
+ column(jc)
+ })
+
+#' create_map
+#'
+#' Creates a new map column. The input columns must be grouped as key-value pairs,
+#' e.g. (key1, value1, key2, value2, ...).
+#' The key columns must all have the same data type, and can't be null.
+#' The value columns must all have the same data type.
+#'
+#' @param x Column to compute on
+#' @param ... additional Column(s).
+#'
+#' @family normal_funcs
+#' @rdname create_map
+#' @name create_map
+#' @aliases create_map,Column-method
+#' @export
+#' @examples \dontrun{create_map(lit("x"), lit(1.0), lit("y"), lit(-1.0))}
+#' @note create_map since 2.3.0
+setMethod("create_map",
+ signature(x = "Column"),
+ function(x, ...) {
+ jcols <- lapply(list(x, ...), function (x) {
+ stopifnot(class(x) == "Column")
+ x@jc
+ })
+ jc <- callJStatic("org.apache.spark.sql.functions", "map", jcols)
+ column(jc)
+ })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 945676c7f1..da46823f52 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -942,6 +942,14 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
#' @export
setGeneric("crc32", function(x) { standardGeneric("crc32") })
+#' @rdname create_array
+#' @export
+setGeneric("create_array", function(x, ...) { standardGeneric("create_array") })
+
+#' @rdname create_map
+#' @export
+setGeneric("create_map", function(x, ...) { standardGeneric("create_map") })
+
#' @rdname hash
#' @export
setGeneric("hash", function(x, ...) { standardGeneric("hash") })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 6a6c9a809a..9e87a47106 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1461,6 +1461,23 @@ test_that("column functions", {
expect_equal(length(arr$arrcol[[1]]), 2)
expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
+
+ # Test create_array() and create_map()
+ df <- as.DataFrame(data.frame(
+ x = c(1.0, 2.0), y = c(-1.0, 3.0), z = c(-2.0, 5.0)
+ ))
+
+ arrs <- collect(select(df, create_array(df$x, df$y, df$z)))
+ expect_equal(arrs[, 1], list(list(1, -1, -2), list(2, 3, 5)))
+
+ maps <- collect(select(
+ df, create_map(lit("x"), df$x, lit("y"), df$y, lit("z"), df$z)))
+
+ expect_equal(
+ maps[, 1],
+ lapply(
+ list(list(x = 1, y = -1, z = -2), list(x = 2, y = 3, z = 5)),
+ as.environment))
})
test_that("column binary mathfunctions", {