aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwm624@hotmail.com <wm624@hotmail.com>2016-06-07 09:13:18 -0700
committerShivaram Venkataraman <shivaram@cs.berkeley.edu>2016-06-07 09:13:18 -0700
commit3ec4461c46e2959f4c640df0292cfcacfe0f727f (patch)
treebe48510afb0ff0254c164a3a7c1e87bd5d0293bc
parent1e2c9311871968426e019164b129652fd6d0037f (diff)
downloadspark-3ec4461c46e2959f4c640df0292cfcacfe0f727f.tar.gz
spark-3ec4461c46e2959f4c640df0292cfcacfe0f727f.tar.bz2
spark-3ec4461c46e2959f4c640df0292cfcacfe0f727f.zip
[SPARK-15684][SPARKR] Not mask startsWith and endsWith in R
## What changes were proposed in this pull request? In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR. 1. Remove signature in generic.R 2. Add setMethod in column.R 3. Add unit tests ## How was this patch tested? Manually test it through SparkR shell for both column data and string data, which are added into the unit test file. Author: wm624@hotmail.com <wm624@hotmail.com> Closes #13476 from wangmiao1981/start.
-rw-r--r--R/pkg/R/column.R36
-rw-r--r--R/pkg/R/generics.R4
-rw-r--r--R/pkg/inst/tests/testthat/test_sparkSQL.R7
3 files changed, 44 insertions, 3 deletions
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index a3e09372bb..873e8b1665 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -57,7 +57,7 @@ operators <- list(
"^" = "pow"
)
column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull")
-column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
+column_functions2 <- c("like", "rlike", "getField", "getItem", "contains")
createOperator <- function(op) {
setMethod(op,
@@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"),
column(jc)
})
+#' startsWith
+#'
+#' Determines if entries of x start with string (entries of) prefix respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname startsWith
+#' @name startsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose “starts” are considered
+#' @param prefix character vector (often of length one)
+setMethod("startsWith", signature(x = "Column"),
+ function(x, prefix) {
+ jc <- callJMethod(x@jc, "startsWith", as.vector(prefix))
+ column(jc)
+ })
+
+#' endsWith
+#'
+#' Determines if entries of x end with string (entries of) suffix respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname endsWith
+#' @name endsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose “ends” are considered
+#' @param suffix character vector (often of length one)
+setMethod("endsWith", signature(x = "Column"),
+ function(x, suffix) {
+ jc <- callJMethod(x@jc, "endsWith", as.vector(suffix))
+ column(jc)
+ })
+
#' between
#'
#' Test if the column is between the lower bound and upper bound, inclusive.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ed76ad6b73..f0cde56b13 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") })
#' @rdname column
#' @export
-setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") })
+setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
#' @rdname column
#' @export
@@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
#' @rdname column
#' @export
-setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") })
+setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
#' @rdname column
#' @export
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 94fa363d7e..375cb6f588 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1136,7 +1136,14 @@ test_that("string operators", {
df <- read.json(jsonPath)
expect_equal(count(where(df, like(df$name, "A%"))), 1)
expect_equal(count(where(df, startsWith(df$name, "A"))), 1)
+ expect_true(first(select(df, startsWith(df$name, "M")))[[1]])
+ expect_false(first(select(df, startsWith(df$name, "m")))[[1]])
+ expect_true(first(select(df, endsWith(df$name, "el")))[[1]])
expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
+ if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
+ expect_true(startsWith("Hello World", "Hello"))
+ expect_false(endsWith("Hello World", "a"))
+ }
expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30")
expect_equal(collect(select(df, concat_ws(":", df$name)))[[2, 1]], "Andy")