aboutsummaryrefslogtreecommitdiff
path: root/R/pkg
diff options
context:
space:
mode:
authorSun Rui <rui.sun@intel.com>2015-11-18 08:41:45 -0800
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2015-11-18 08:41:45 -0800
commit224723e6a8b198ef45d6c5ca5d2f9c61188ada8f (patch)
treed5aa29ba83426848d73d9c0e68ef7762c2c252fa /R/pkg
parenta97d6f3a5861e9f2bbe36957e3b39f835f3e214c (diff)
downloadspark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.tar.gz
spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.tar.bz2
spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.zip
[SPARK-11773][SPARKR] Implement collection functions in SparkR.
Author: Sun Rui <rui.sun@intel.com> Closes #9764 from sun-rui/SPARK-11773.
Diffstat (limited to 'R/pkg')
-rw-r--r--R/pkg/NAMESPACE2
-rw-r--r--R/pkg/R/DataFrame.R2
-rw-r--r--R/pkg/R/functions.R109
-rw-r--r--R/pkg/R/generics.R10
-rw-r--r--R/pkg/R/utils.R2
-rw-r--r--R/pkg/inst/tests/test_sparkSQL.R10
6 files changed, 100 insertions, 35 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 2ee7d6f94f..260c9edce6 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -98,6 +98,7 @@ exportMethods("%in%",
"add_months",
"alias",
"approxCountDistinct",
+ "array_contains",
"asc",
"ascii",
"asin",
@@ -215,6 +216,7 @@ exportMethods("%in%",
"sinh",
"size",
"skewness",
+ "sort_array",
"soundex",
"stddev",
"stddev_pop",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index fd105ba5bc..34177e3cdd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2198,4 +2198,4 @@ setMethod("coltypes",
rTypes[naIndices] <- types[naIndices]
rTypes
- }) \ No newline at end of file
+ })
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 3d0255a62f..ff0f438045 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -373,22 +373,6 @@ setMethod("exp",
column(jc)
})
-#' explode
-#'
-#' Creates a new row for each element in the given array or map column.
-#'
-#' @rdname explode
-#' @name explode
-#' @family collection_funcs
-#' @export
-#' @examples \dontrun{explode(df$c)}
-setMethod("explode",
- signature(x = "Column"),
- function(x) {
- jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
- column(jc)
- })
-
#' expm1
#'
#' Computes the exponential of the given value minus one.
@@ -980,22 +964,6 @@ setMethod("sinh",
column(jc)
})
-#' size
-#'
-#' Returns length of array or map.
-#'
-#' @rdname size
-#' @name size
-#' @family collection_funcs
-#' @export
-#' @examples \dontrun{size(df$c)}
-setMethod("size",
- signature(x = "Column"),
- function(x) {
- jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
- column(jc)
- })
-
#' skewness
#'
#' Aggregate function: returns the skewness of the values in a group.
@@ -2365,3 +2333,80 @@ setMethod("rowNumber",
jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
column(jc)
})
+
+###################### Collection functions######################
+
+#' array_contains
+#'
+#' Returns true if the array contain the value.
+#'
+#' @param x A Column
+#' @param value A value to be checked if contained in the column
+#' @rdname array_contains
+#' @name array_contains
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{array_contains(df$c, 1)}
+setMethod("array_contains",
+ signature(x = "Column", value = "ANY"),
+ function(x, value) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "array_contains", x@jc, value)
+ column(jc)
+ })
+
+#' explode
+#'
+#' Creates a new row for each element in the given array or map column.
+#'
+#' @rdname explode
+#' @name explode
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{explode(df$c)}
+setMethod("explode",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
+ column(jc)
+ })
+
+#' size
+#'
+#' Returns length of array or map.
+#'
+#' @rdname size
+#' @name size
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{size(df$c)}
+setMethod("size",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
+ column(jc)
+ })
+
+#' sort_array
+#'
+#' Sorts the input array for the given column in ascending order,
+#' according to the natural ordering of the array elements.
+#'
+#' @param x A Column to sort
+#' @param asc A logical flag indicating the sorting order.
+#' TRUE, sorting is in ascending order.
+#' FALSE, sorting is in descending order.
+#' @rdname sort_array
+#' @name sort_array
+#' @family collection_funcs
+#' @export
+#' @examples
+#' \dontrun{
+#' sort_array(df$c)
+#' sort_array(df$c, FALSE)
+#' }
+setMethod("sort_array",
+ signature(x = "Column"),
+ function(x, asc = TRUE) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "sort_array", x@jc, asc)
+ column(jc)
+ })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index afdeffc2ab..0dcd054382 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -644,6 +644,10 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
#' @export
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
+#' @rdname array_contains
+#' @export
+setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
+
#' @rdname ascii
#' @export
setGeneric("ascii", function(x) { standardGeneric("ascii") })
@@ -961,6 +965,10 @@ setGeneric("size", function(x) { standardGeneric("size") })
#' @export
setGeneric("skewness", function(x) { standardGeneric("skewness") })
+#' @rdname sort_array
+#' @export
+setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
+
#' @rdname soundex
#' @export
setGeneric("soundex", function(x) { standardGeneric("soundex") })
@@ -1076,4 +1084,4 @@ setGeneric("with")
#' @rdname coltypes
#' @export
-setGeneric("coltypes", function(x) { standardGeneric("coltypes") }) \ No newline at end of file
+setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index db3b2c4bbd..45c77a86c9 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -635,4 +635,4 @@ assignNewEnv <- function(data) {
assign(x = cols[i], value = data[, cols[i]], envir = env)
}
env
-} \ No newline at end of file
+}
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 87ab33f638..d9a94faff7 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -878,6 +878,16 @@ test_that("column functions", {
df4 <- createDataFrame(sqlContext, list(list(a = "010101")))
expect_equal(collect(select(df4, conv(df4$a, 2, 16)))[1, 1], "15")
+
+ # Test array_contains() and sort_array()
+ df <- createDataFrame(sqlContext, list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
+ result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
+ expect_equal(result, c(TRUE, FALSE))
+
+ result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
+ expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
+ result <- collect(select(df, sort_array(df[[1]])))[[1]]
+ expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
})
#
test_that("column binary mathfunctions", {