From 3ec4461c46e2959f4c640df0292cfcacfe0f727f Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Tue, 7 Jun 2016 09:13:18 -0700
Subject: [SPARK-15684][SPARKR] Not mask startsWith and endsWith in R

## What changes were proposed in this pull request?

In R 3.3.0, startsWith and endsWith are added. In this PR, I make the two work in SparkR.
1. Remove signature in generic.R
2. Add setMethod in column.R
3. Add unit tests

## How was this patch tested?
Manually test it through SparkR shell for both column data and string data, which are added into the unit test file.

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #13476 from wangmiao1981/start.
---
 R/pkg/R/column.R                          | 36 ++++++++++++++++++++++++++++++-
 R/pkg/R/generics.R                        |  4 ++--
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  7 ++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index a3e09372bb..873e8b1665 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -57,7 +57,7 @@ operators <- list(
   "^" = "pow"
 )
 column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull")
-column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
+column_functions2 <- c("like", "rlike", "getField", "getItem", "contains")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -151,6 +151,40 @@ setMethod("substr", signature(x = "Column"),
             column(jc)
           })
 
+#' startsWith
+#'
+#' Determines if entries of x start with string (entries of) prefix respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname startsWith
+#' @name startsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose “starts” are considered
+#' @param prefix character vector (often of length one)
+setMethod("startsWith", signature(x = "Column"),
+          function(x, prefix) {
+            jc <- callJMethod(x@jc, "startsWith", as.vector(prefix))
+            column(jc)
+          })
+
+#' endsWith
+#'
+#' Determines if entries of x end with string (entries of) suffix respectively,
+#' where strings are recycled to common lengths.
+#'
+#' @rdname endsWith
+#' @name endsWith
+#' @family colum_func
+#'
+#' @param x vector of character string whose “ends” are considered
+#' @param suffix character vector (often of length one)
+setMethod("endsWith", signature(x = "Column"),
+          function(x, suffix) {
+            jc <- callJMethod(x@jc, "endsWith", as.vector(suffix))
+            column(jc)
+          })
+
 #' between
 #'
 #' Test if the column is between the lower bound and upper bound, inclusive.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index ed76ad6b73..f0cde56b13 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -695,7 +695,7 @@ setGeneric("desc", function(x) { standardGeneric("desc") })
 
 #' @rdname column
 #' @export
-setGeneric("endsWith", function(x, ...) { standardGeneric("endsWith") })
+setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
 
 #' @rdname column
 #' @export
@@ -727,7 +727,7 @@ setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
 
 #' @rdname column
 #' @export
-setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") })
+setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
 
 #' @rdname column
 #' @export
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 94fa363d7e..375cb6f588 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1136,7 +1136,14 @@ test_that("string operators", {
   df <- read.json(jsonPath)
   expect_equal(count(where(df, like(df$name, "A%"))), 1)
   expect_equal(count(where(df, startsWith(df$name, "A"))), 1)
+  expect_true(first(select(df, startsWith(df$name, "M")))[[1]])
+  expect_false(first(select(df, startsWith(df$name, "m")))[[1]])
+  expect_true(first(select(df, endsWith(df$name, "el")))[[1]])
   expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
+  if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
+    expect_true(startsWith("Hello World", "Hello"))
+    expect_false(endsWith("Hello World", "a"))
+  }
   expect_equal(collect(select(df, cast(df$age, "string")))[[2, 1]], "30")
   expect_equal(collect(select(df, concat(df$name, lit(":"), df$age)))[[2, 1]], "Andy:30")
   expect_equal(collect(select(df, concat_ws(":", df$name)))[[2, 1]], "Andy")
-- 
cgit v1.2.3