diff options
author | wm624@hotmail.com <wm624@hotmail.com> | 2016-06-07 09:13:18 -0700 |
---|---|---|
committer | Shivaram Venkataraman <shivaram@cs.berkeley.edu> | 2016-06-07 09:13:18 -0700 |
commit | 3ec4461c46e2959f4c640df0292cfcacfe0f727f (patch) | |
tree | be48510afb0ff0254c164a3a7c1e87bd5d0293bc /R | |
parent | 1e2c9311871968426e019164b129652fd6d0037f (diff) | |
download | spark-3ec4461c46e2959f4c640df0292cfcacfe0f727f.tar.gz spark-3ec4461c46e2959f4c640df0292cfcacfe0f727f.tar.bz2 spark-3ec4461c46e2959f4c640df0292cfcacfe0f727f.zip |
[SPARK-15684][SPARKR] Not mask startsWith and endsWith in R
## What changes were proposed in this pull request?
In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR.
1. Remove signature in generic.R
2. Add setMethod in column.R
3. Add unit tests
## How was this patch tested?
Manually test it through SparkR shell for both column data and string data, which are added into the unit test file.
Author: wm624@hotmail.com <wm624@hotmail.com>
Closes #13476 from wangmiao1981/start.
Diffstat (limited to 'R')
-rw-r--r-- | R/pkg/R/column.R | 36 | ||||
-rw-r--r-- | R/pkg/R/generics.R | 4 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 |
3 files changed, 44 insertions, 3 deletions
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index a3e09372bb..873e8b1665 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -57,7 +57,7 @@ operators <- list( "^" = "pow" ) column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") -column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") +column_functions2 <- c("like", "rlike", "getField", "getItem", "contains") createOperator <- function(op) { setMethod(op, @@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"), column(jc) }) +#' startsWith +#' +#' Determines if entries of x start with string (entries of) prefix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname startsWith +#' @name startsWith +#' @family colum_func +#' +#' @param x vector of character string whose “starts” are considered +#' @param prefix character vector (often of length one) +setMethod("startsWith", signature(x = "Column"), + function(x, prefix) { + jc <- callJMethod(x@jc, "startsWith", as.vector(prefix)) + column(jc) + }) + +#' endsWith +#' +#' Determines if entries of x end with string (entries of) suffix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname endsWith +#' @name endsWith +#' @family colum_func +#' +#' @param x vector of character string whose “ends” are considered +#' @param suffix character vector (often of length one) +setMethod("endsWith", signature(x = "Column"), + function(x, suffix) { + jc <- callJMethod(x@jc, "endsWith", as.vector(suffix)) + column(jc) + }) + #' between #' #' Test if the column is between the lower bound and upper bound, inclusive. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ed76ad6b73..f0cde56b13 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") }) #' @rdname column #' @export -setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") }) +setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) #' @rdname column #' @export @@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @rdname column #' @export -setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) +setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") }) #' @rdname column #' @export diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 94fa363d7e..375cb6f588 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1136,7 +1136,14 @@ test_that("string operators", { df <- read.json(jsonPath) expect_equal(count(where(df, like(df$name, "A%"))), 1) expect_equal(count(where(df, startsWith(df$name, "A"))), 1) + expect_true(first(select(df, startsWith(df$name, "M")))[[1]]) + expect_false(first(select(df, startsWith(df$name, "m")))[[1]]) + expect_true(first(select(df, endsWith(df$name, "el")))[[1]]) expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi") + if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { + expect_true(startsWith("Hello World", "Hello")) + expect_false(endsWith("Hello World", "a")) + } expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30") expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30") expect_equal(collect(select(df, concat_ws(":", df$name)))[[2, 1]], "Andy") |