diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2015-12-05 22:51:05 -0800 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2015-12-05 22:51:05 -0800 |
commit | b6e8e63a0dbe471187a146c96fdaddc6b8a8e55e (patch) | |
tree | 6e456e71fff038d2db3491169747a4e5f9cb7539 /R/pkg | |
parent | 6979edf4e1a93caafa8d286692097dd377d7616d (diff) | |
download | spark-b6e8e63a0dbe471187a146c96fdaddc6b8a8e55e.tar.gz spark-b6e8e63a0dbe471187a146c96fdaddc6b8a8e55e.tar.bz2 spark-b6e8e63a0dbe471187a146c96fdaddc6b8a8e55e.zip |
[SPARK-12044][SPARKR] Fix usage of isnan, isNaN
1, Add ```isNaN``` to ```Column``` for SparkR. ```Column``` should has three related variable functions: ```isNaN, isNull, isNotNull```.
2, Replace ```DataFrame.isNaN``` with ```DataFrame.isnan``` at SparkR side. Because ```DataFrame.isNaN``` has been deprecated and will be removed at Spark 2.0.
<del>3, Add ```isnull``` to ```DataFrame``` for SparkR. ```DataFrame``` should has two related functions: ```isnan, isnull```.<del>
cc shivaram sun-rui felixcheung
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #10037 from yanboliang/spark-12044.
Diffstat (limited to 'R/pkg')
-rw-r--r-- | R/pkg/R/column.R | 2 | ||||
-rw-r--r-- | R/pkg/R/functions.R | 26 | ||||
-rw-r--r-- | R/pkg/R/generics.R | 8 | ||||
-rw-r--r-- | R/pkg/inst/tests/test_sparkSQL.R | 6 |
4 files changed, 31 insertions, 11 deletions
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 20de3907b7..7bb8ef2595 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -56,7 +56,7 @@ operators <- list( "&" = "and", "|" = "or", #, "!" = "unary_$bang" "^" = "pow" ) -column_functions1 <- c("asc", "desc", "isNull", "isNotNull") +column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") createOperator <- function(op) { diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 25231451df..09e4e04335 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -537,19 +537,31 @@ setMethod("initcap", column(jc) }) -#' isNaN +#' is.nan #' -#' Return true iff the column is NaN. +#' Return true if the column is NaN, alias for \link{isnan} #' -#' @rdname isNaN -#' @name isNaN +#' @rdname is.nan +#' @name is.nan #' @family normal_funcs #' @export -#' @examples \dontrun{isNaN(df$c)} -setMethod("isNaN", +#' @examples +#' \dontrun{ +#' is.nan(df$c) +#' isnan(df$c) +#' } +setMethod("is.nan", + signature(x = "Column"), + function(x) { + isnan(x) + }) + +#' @rdname is.nan +#' @name isnan +setMethod("isnan", signature(x = "Column"), function(x) { - jc <- callJStatic("org.apache.spark.sql.functions", "isNaN", x@jc) + jc <- callJStatic("org.apache.spark.sql.functions", "isnan", x@jc) column(jc) }) diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 29dd11f41f..c383e6e78b 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -627,6 +627,10 @@ setGeneric("getItem", function(x, ...) { standardGeneric("getItem") }) #' @rdname column #' @export +setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) + +#' @rdname column +#' @export setGeneric("isNull", function(x) { standardGeneric("isNull") }) #' @rdname column @@ -808,9 +812,9 @@ setGeneric("initcap", function(x) { standardGeneric("initcap") }) #' @export setGeneric("instr", function(y, x) { standardGeneric("instr") }) -#' @rdname isNaN +#' @rdname is.nan #' @export -setGeneric("isNaN", function(x) { standardGeneric("isNaN") }) +setGeneric("isnan", function(x) { standardGeneric("isnan") }) #' @rdname kurtosis #' @export diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index a5a234a02d..6ef03ae976 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -883,7 +883,7 @@ test_that("column functions", { c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c) c3 <- cosh(c) + count(c) + crc32(c) + exp(c) c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c) - c5 <- hour(c) + initcap(c) + isNaN(c) + last(c) + last_day(c) + length(c) + c5 <- hour(c) + initcap(c) + last(c) + last_day(c) + length(c) c6 <- log(c) + (c) + log1p(c) + log2(c) + lower(c) + ltrim(c) + max(c) + md5(c) c7 <- mean(c) + min(c) + month(c) + negate(c) + quarter(c) c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) @@ -894,6 +894,10 @@ test_that("column functions", { c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1) c14 <- cume_dist() + ntile(1) + corr(c, c1) c15 <- dense_rank() + percent_rank() + rank() + row_number() + c16 <- is.nan(c) + isnan(c) + isNaN(c) + + # Test if base::is.nan() is exposed + expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) # Test if base::rank() is exposed expect_equal(class(rank())[[1]], "Column") |