[SPARK-11773][SPARKR] Implement collection functions in SparkR.

Author: Sun Rui <rui.sun@intel.com> Closes #9764 from sun-rui/SPARK-11773.
author: Sun Rui <rui.sun@intel.com> 2015-11-18 08:41:45 -0800
committer: Shivaram Venkataraman <shivaram@cs.berkeley.edu> 2015-11-18 08:41:45 -0800
commit: 224723e6a8b198ef45d6c5ca5d2f9c61188ada8f (patch)
tree: d5aa29ba83426848d73d9c0e68ef7762c2c252fa /R/pkg
parent: a97d6f3a5861e9f2bbe36957e3b39f835f3e214c (diff)
download: spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.tar.gz
spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.tar.bz2
spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.zip
6 files changed, 100 insertions, 35 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 2ee7d6f94f..260c9edce6 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -98,6 +98,7 @@ exportMethods("%in%",
               "add_months",
               "alias",
               "approxCountDistinct",
+              "array_contains",
               "asc",
               "ascii",
               "asin",
@@ -215,6 +216,7 @@ exportMethods("%in%",
               "sinh",
               "size",
               "skewness",
+              "sort_array",
               "soundex",
               "stddev",
               "stddev_pop",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index fd105ba5bc..34177e3cdd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2198,4 +2198,4 @@ setMethod("coltypes",
             rTypes[naIndices] <- types[naIndices]
 
             rTypes
-          })
-\ No newline at end of file
+          })
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 3d0255a62f..ff0f438045 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -373,22 +373,6 @@ setMethod("exp",
             column(jc)
           })
 
-#' explode
-#'
-#' Creates a new row for each element in the given array or map column.
-#'
-#' @rdname explode
-#' @name explode
-#' @family collection_funcs
-#' @export
-#' @examples \dontrun{explode(df$c)}
-setMethod("explode",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
-            column(jc)
-          })
-
 #' expm1
 #'
 #' Computes the exponential of the given value minus one.
@@ -980,22 +964,6 @@ setMethod("sinh",
             column(jc)
           })
 
-#' size
-#'
-#' Returns length of array or map.
-#'
-#' @rdname size
-#' @name size
-#' @family collection_funcs
-#' @export
-#' @examples \dontrun{size(df$c)}
-setMethod("size",
-          signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
-            column(jc)
-          })
-
 #' skewness
 #'
 #' Aggregate function: returns the skewness of the values in a group.
@@ -2365,3 +2333,80 @@ setMethod("rowNumber",
             jc <- callJStatic("org.apache.spark.sql.functions", "rowNumber")
             column(jc)
           })
+
+###################### Collection functions######################
+
+#' array_contains
+#'
+#' Returns true if the array contain the value.
+#'
+#' @param x A Column
+#' @param value A value to be checked if contained in the column
+#' @rdname array_contains
+#' @name array_contains
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{array_contains(df$c, 1)}
+setMethod("array_contains",
+          signature(x = "Column", value = "ANY"),
+          function(x, value) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "array_contains", x@jc, value)
+            column(jc)
+          })
+
+#' explode
+#'
+#' Creates a new row for each element in the given array or map column.
+#'
+#' @rdname explode
+#' @name explode
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{explode(df$c)}
+setMethod("explode",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "explode", x@jc)
+            column(jc)
+          })
+
+#' size
+#'
+#' Returns length of array or map.
+#'
+#' @rdname size
+#' @name size
+#' @family collection_funcs
+#' @export
+#' @examples \dontrun{size(df$c)}
+setMethod("size",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "size", x@jc)
+            column(jc)
+          })
+
+#' sort_array
+#'
+#' Sorts the input array for the given column in ascending order,
+#' according to the natural ordering of the array elements.
+#'
+#' @param x A Column to sort
+#' @param asc A logical flag indicating the sorting order.
+#'            TRUE, sorting is in ascending order.
+#'            FALSE, sorting is in descending order.
+#' @rdname sort_array
+#' @name sort_array
+#' @family collection_funcs
+#' @export
+#' @examples
+#' \dontrun{
+#' sort_array(df$c)
+#' sort_array(df$c, FALSE)
+#' }
+setMethod("sort_array",
+          signature(x = "Column"),
+          function(x, asc = TRUE) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "sort_array", x@jc, asc)
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index afdeffc2ab..0dcd054382 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -644,6 +644,10 @@ setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 #' @export
 setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
 
+#' @rdname array_contains
+#' @export
+setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
+
 #' @rdname ascii
 #' @export
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
@@ -961,6 +965,10 @@ setGeneric("size", function(x) { standardGeneric("size") })
 #' @export
 setGeneric("skewness", function(x) { standardGeneric("skewness") })
 
+#' @rdname sort_array
+#' @export
+setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") })
+
 #' @rdname soundex
 #' @export
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
@@ -1076,4 +1084,4 @@ setGeneric("with")
 
 #' @rdname coltypes
 #' @export
-setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
-\ No newline at end of file
+setGeneric("coltypes", function(x) { standardGeneric("coltypes") })
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index db3b2c4bbd..45c77a86c9 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -635,4 +635,4 @@ assignNewEnv <- function(data) {
     assign(x = cols[i], value = data[, cols[i]], envir = env)
   }
   env
-}
-\ No newline at end of file
+}
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index 87ab33f638..d9a94faff7 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -878,6 +878,16 @@ test_that("column functions", {
 
   df4 <- createDataFrame(sqlContext, list(list(a = "010101")))
   expect_equal(collect(select(df4, conv(df4$a, 2, 16)))[1, 1], "15")
+
+  # Test array_contains() and sort_array()
+  df <- createDataFrame(sqlContext, list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
+  result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
+  expect_equal(result, c(TRUE, FALSE))
+
+  result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
+  expect_equal(result, list(list(3L, 2L, 1L), list(6L, 5L, 4L)))
+  result <- collect(select(df, sort_array(df[[1]])))[[1]]
+  expect_equal(result, list(list(1L, 2L, 3L), list(4L, 5L, 6L)))
 })
 #
 test_that("column binary mathfunctions", {
author	Sun Rui <rui.sun@intel.com>	2015-11-18 08:41:45 -0800
committer	Shivaram Venkataraman <shivaram@cs.berkeley.edu>	2015-11-18 08:41:45 -0800
commit	224723e6a8b198ef45d6c5ca5d2f9c61188ada8f (patch)
tree	d5aa29ba83426848d73d9c0e68ef7762c2c252fa /R/pkg
parent	a97d6f3a5861e9f2bbe36957e3b39f835f3e214c (diff)
download	spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.tar.gz spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.tar.bz2 spark-224723e6a8b198ef45d6c5ca5d2f9c61188ada8f.zip