aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2016-01-09 12:29:51 +0530
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-01-09 12:29:51 +0530
commit3d77cffec093bed4d330969f1a996f3358b9a772 (patch)
tree9962e9e4e985548c53be4c31a4b9759508a46657 /R
parent95cd5d95ce8aec8b2462204c791ba927326305ba (diff)
downloadspark-3d77cffec093bed4d330969f1a996f3358b9a772.tar.gz
spark-3d77cffec093bed4d330969f1a996f3358b9a772.tar.bz2
spark-3d77cffec093bed4d330969f1a996f3358b9a772.zip
[SPARK-12645][SPARKR] SparkR support hash function
Add ```hash``` function for SparkR ```DataFrame```. Author: Yanbo Liang <ybliang8@gmail.com> Closes #10597 from yanboliang/spark-12645.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/NAMESPACE1
-rw-r--r--R/pkg/R/functions.R20
-rw-r--r--R/pkg/R/generics.R4
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R2
4 files changed, 26 insertions, 1 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index beacc39500..34be7f0ebd 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -130,6 +130,7 @@ exportMethods("%in%",
"count",
"countDistinct",
"crc32",
+ "hash",
"cume_dist",
"date_add",
"date_format",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index df36bc869a..9bb7876b38 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -340,6 +340,26 @@ setMethod("crc32",
column(jc)
})
+#' hash
+#'
+#' Calculates the hash code of given columns, and returns the result as a int column.
+#'
+#' @rdname hash
+#' @name hash
+#' @family misc_funcs
+#' @export
+#' @examples \dontrun{hash(df$c)}
+setMethod("hash",
+ signature(x = "Column"),
+ function(x, ...) {
+ jcols <- lapply(list(x, ...), function (x) {
+ stopifnot(class(x) == "Column")
+ x@jc
+ })
+ jc <- callJStatic("org.apache.spark.sql.functions", "hash", jcols)
+ column(jc)
+ })
+
#' dayofmonth
#'
#' Extracts the day of the month as an integer from a given date/timestamp/string.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ba68617097..5ba68e3a4f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -736,6 +736,10 @@ setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct")
#' @export
setGeneric("crc32", function(x) { standardGeneric("crc32") })
+#' @rdname hash
+#' @export
+setGeneric("hash", function(x, ...) { standardGeneric("hash") })
+
#' @rdname cume_dist
#' @export
setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index eaf60beda3..97625b94a0 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -922,7 +922,7 @@ test_that("column functions", {
c <- column("a")
c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
- c3 <- cosh(c) + count(c) + crc32(c) + exp(c)
+ c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
c5 <- hour(c) + initcap(c) + last(c) + last_day(c) + length(c)
c6 <- log(c) + (c) + log1p(c) + log2(c) + lower(c) + ltrim(c) + max(c) + md5(c)