diff options
author | anabranch <wac.chambers@gmail.com> | 2017-02-07 15:50:30 +0100 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2017-02-07 15:50:30 +0100 |
commit | 7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db (patch) | |
tree | 4a2533e6d7dcc2e38d62db64f87634a15f3e2439 /R | |
parent | 6ed285c68fee451c45db7b01ca8ec1dea2efd479 (diff) | |
download | spark-7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db.tar.gz spark-7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db.tar.bz2 spark-7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db.zip |
[SPARK-16609] Add to_date/to_timestamp with format functions
## What changes were proposed in this pull request?
This pull request adds two new user facing functions:
- `to_date` which accepts an expression and a format and returns a date.
- `to_timestamp` which accepts an expression and a format and returns a timestamp.
For example, Given a date in format: `2016-21-05`. (YYYY-dd-MM)
### Date Function
*Previously*
```
to_date(unix_timestamp(lit("2016-21-05"), "yyyy-dd-MM").cast("timestamp"))
```
*Current*
```
to_date(lit("2016-21-05"), "yyyy-dd-MM")
```
### Timestamp Function
*Previously*
```
unix_timestamp(lit("2016-21-05"), "yyyy-dd-MM").cast("timestamp")
```
*Current*
```
to_timestamp(lit("2016-21-05"), "yyyy-dd-MM")
```
### Tasks
- [X] Add `to_date` to Scala Functions
- [x] Add `to_date` to Python Functions
- [x] Add `to_date` to SQL Functions
- [X] Add `to_timestamp` to Scala Functions
- [x] Add `to_timestamp` to Python Functions
- [x] Add `to_timestamp` to SQL Functions
- [x] Add function to R
## How was this patch tested?
- [x] Add Functions to `DateFunctionsSuite`
- Test new `ParseToTimestamp` Expression (*not necessary*)
- Test new `ParseToDate` Expression (*not necessary*)
- [x] Add test for R
- [x] Add test for Python in test.py
Please review http://spark.apache.org/contributing.html before opening a pull request.
Author: anabranch <wac.chambers@gmail.com>
Author: Bill Chambers <bill@databricks.com>
Author: anabranch <bill@databricks.com>
Closes #16138 from anabranch/SPARK-16609.
Diffstat (limited to 'R')
-rw-r--r-- | R/pkg/NAMESPACE | 1 | ||||
-rw-r--r-- | R/pkg/R/functions.R | 80 | ||||
-rw-r--r-- | R/pkg/R/generics.R | 6 | ||||
-rw-r--r-- | R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 |
4 files changed, 80 insertions, 8 deletions
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 7c16ba3072..625c797f8a 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -325,6 +325,7 @@ exportMethods("%in%", "toDegrees", "toRadians", "to_date", + "to_timestamp", "to_utc_timestamp", "translate", "trim", diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 6ffa0f5481..032cfecfc0 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1730,24 +1730,90 @@ setMethod("toRadians", #' to_date #' -#' Converts the column into DateType. +#' Converts the column into a DateType. You may optionally specify a format +#' according to the rules in: +#' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. +#' If the string cannot be parsed according to the specified format (or default), +#' the value of the column will be null. +#' The default format is 'yyyy-MM-dd'. #' -#' @param x Column to compute on. +#' @param x Column to parse. +#' @param format string to use to parse x Column to DateType. (optional) #' #' @rdname to_date #' @name to_date #' @family datetime_funcs -#' @aliases to_date,Column-method +#' @aliases to_date,Column,missing-method #' @export -#' @examples \dontrun{to_date(df$c)} -#' @note to_date since 1.5.0 +#' @examples +#' \dontrun{ +#' to_date(df$c) +#' to_date(df$c, 'yyyy-MM-dd') +#' } +#' @note to_date(Column) since 1.5.0 setMethod("to_date", - signature(x = "Column"), - function(x) { + signature(x = "Column", format = "missing"), + function(x, format) { jc <- callJStatic("org.apache.spark.sql.functions", "to_date", x@jc) column(jc) }) +#' @rdname to_date +#' @name to_date +#' @family datetime_funcs +#' @aliases to_date,Column,character-method +#' @export +#' @note to_date(Column, character) since 2.2.0 +setMethod("to_date", + signature(x = "Column", format = "character"), + function(x, format) { + jc <- callJStatic("org.apache.spark.sql.functions", "to_date", x@jc, format) + column(jc) + }) + +#' to_timestamp +#' +#' Converts the column into a TimestampType. You may optionally specify a format +#' according to the rules in: +#' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. +#' If the string cannot be parsed according to the specified format (or default), +#' the value of the column will be null. +#' The default format is 'yyyy-MM-dd HH:mm:ss'. +#' +#' @param x Column to parse. +#' @param format string to use to parse x Column to DateType. (optional) +#' +#' @rdname to_timestamp +#' @name to_timestamp +#' @family datetime_funcs +#' @aliases to_timestamp,Column,missing-method +#' @export +#' @examples +#' \dontrun{ +#' to_timestamp(df$c) +#' to_timestamp(df$c, 'yyyy-MM-dd') +#' } +#' @note to_timestamp(Column) since 2.2.0 +setMethod("to_timestamp", + signature(x = "Column", format = "missing"), + function(x, format) { + jc <- callJStatic("org.apache.spark.sql.functions", "to_timestamp", x@jc) + column(jc) + }) + +#' @rdname to_timestamp +#' @name to_timestamp +#' @family datetime_funcs +#' @aliases to_timestamp,Column,character-method +#' @export +#' @note to_timestamp(Column, character) since 2.2.0 +setMethod("to_timestamp", + signature(x = "Column", format = "character"), + function(x, format) { + jc <- callJStatic("org.apache.spark.sql.functions", "to_timestamp", x@jc, format) + column(jc) + }) + #' trim #' #' Trim the spaces from both ends for the specified string column. diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 0307bac349..d78b1a10d6 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -1256,7 +1256,11 @@ setGeneric("toRadians", function(x) { standardGeneric("toRadians") }) #' @rdname to_date #' @export -setGeneric("to_date", function(x) { standardGeneric("to_date") }) +setGeneric("to_date", function(x, format) { standardGeneric("to_date") }) + +#' @rdname to_timestamp +#' @export +setGeneric("to_timestamp", function(x, format) { standardGeneric("to_timestamp") }) #' @rdname to_utc_timestamp #' @export diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 418f128ce8..233a20c3d3 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1237,6 +1237,7 @@ test_that("column functions", { c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1") c18 <- covar_pop(c, c1) + covar_pop("c", "c1") c19 <- spark_partition_id() + c20 <- to_timestamp(c) + to_timestamp(c, "yyyy") + to_date(c, "yyyy") # Test if base::is.nan() is exposed expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE)) |