From 3ec4461c46e2959f4c640df0292cfcacfe0f727f Mon Sep 17 00:00:00 2001 From: "wm624@hotmail.com" Date: Tue, 7 Jun 2016 09:13:18 -0700 Subject: [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R ## What changes were proposed in this pull request? In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR. 1. Remove signature in generic.R 2. Add setMethod in column.R 3. Add unit tests ## How was this patch tested? Manually test it through SparkR shell for both column data and string data, which are added into the unit test file. Author: wm624@hotmail.com Closes #13476 from wangmiao1981/start. --- R/pkg/R/column.R | 36 ++++++++++++++++++++++++++++++- R/pkg/R/generics.R | 4 ++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 ++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index a3e09372bb..873e8b1665 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -57,7 +57,7 @@ operators <- list( "^" = "pow" ) column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull") -column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") +column_functions2 <- c("like", "rlike", "getField", "getItem", "contains") createOperator <- function(op) { setMethod(op, @@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"), column(jc) }) +#' startsWith +#' +#' Determines if entries of x start with string (entries of) prefix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname startsWith +#' @name startsWith +#' @family colum_func +#' +#' @param x vector of character string whose “starts” are considered +#' @param prefix character vector (often of length one) +setMethod("startsWith", signature(x = "Column"), + function(x, prefix) { + jc <- callJMethod(x@jc, "startsWith", as.vector(prefix)) + column(jc) + }) + +#' endsWith +#' +#' Determines if entries of x end with string (entries of) suffix respectively, +#' where strings are recycled to common lengths. +#' +#' @rdname endsWith +#' @name endsWith +#' @family colum_func +#' +#' @param x vector of character string whose “ends” are considered +#' @param suffix character vector (often of length one) +setMethod("endsWith", signature(x = "Column"), + function(x, suffix) { + jc <- callJMethod(x@jc, "endsWith", as.vector(suffix)) + column(jc) + }) + #' between #' #' Test if the column is between the lower bound and upper bound, inclusive. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index ed76ad6b73..f0cde56b13 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") }) #' @rdname column #' @export -setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") }) +setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") }) #' @rdname column #' @export @@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) #' @rdname column #' @export -setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) +setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") }) #' @rdname column #' @export diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 94fa363d7e..375cb6f588 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1136,7 +1136,14 @@ test_that("string operators", { df <- read.json(jsonPath) expect_equal(count(where(df, like(df$name, "A%"))), 1) expect_equal(count(where(df, startsWith(df$name, "A"))), 1) + expect_true(first(select(df, startsWith(df$name, "M")))[[1]]) + expect_false(first(select(df, startsWith(df$name, "m")))[[1]]) + expect_true(first(select(df, endsWith(df$name, "el")))[[1]]) expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi") + if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) { + expect_true(startsWith("Hello World", "Hello")) + expect_false(endsWith("Hello World", "a")) + } expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30") expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30") expect_equal(collect(select(df, concat_ws(":", df$name)))[[2, 1]], "Andy") -- cgit v1.2.3