aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authoranabranch <wac.chambers@gmail.com>2017-02-07 15:50:30 +0100
committerHerman van Hovell <hvanhovell@databricks.com>2017-02-07 15:50:30 +0100
commit7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db (patch)
tree4a2533e6d7dcc2e38d62db64f87634a15f3e2439 /sql/core
parent6ed285c68fee451c45db7b01ca8ec1dea2efd479 (diff)
downloadspark-7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db.tar.gz
spark-7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db.tar.bz2
spark-7a7ce272fe9a703f58b0180a9d2001ecb5c4b8db.zip
[SPARK-16609] Add to_date/to_timestamp with format functions
## What changes were proposed in this pull request? This pull request adds two new user facing functions: - `to_date` which accepts an expression and a format and returns a date. - `to_timestamp` which accepts an expression and a format and returns a timestamp. For example, Given a date in format: `2016-21-05`. (YYYY-dd-MM) ### Date Function *Previously* ``` to_date(unix_timestamp(lit("2016-21-05"), "yyyy-dd-MM").cast("timestamp")) ``` *Current* ``` to_date(lit("2016-21-05"), "yyyy-dd-MM") ``` ### Timestamp Function *Previously* ``` unix_timestamp(lit("2016-21-05"), "yyyy-dd-MM").cast("timestamp") ``` *Current* ``` to_timestamp(lit("2016-21-05"), "yyyy-dd-MM") ``` ### Tasks - [X] Add `to_date` to Scala Functions - [x] Add `to_date` to Python Functions - [x] Add `to_date` to SQL Functions - [X] Add `to_timestamp` to Scala Functions - [x] Add `to_timestamp` to Python Functions - [x] Add `to_timestamp` to SQL Functions - [x] Add function to R ## How was this patch tested? - [x] Add Functions to `DateFunctionsSuite` - Test new `ParseToTimestamp` Expression (*not necessary*) - Test new `ParseToDate` Expression (*not necessary*) - [x] Add test for R - [x] Add test for Python in test.py Please review http://spark.apache.org/contributing.html before opening a pull request. Author: anabranch <wac.chambers@gmail.com> Author: Bill Chambers <bill@databricks.com> Author: anabranch <bill@databricks.com> Closes #16138 from anabranch/SPARK-16609.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala33
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala68
2 files changed, 94 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 5e27484c11..24ed906d33 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2665,6 +2665,27 @@ object functions {
def unix_timestamp(s: Column, p: String): Column = withExpr {UnixTimestamp(s.expr, Literal(p)) }
/**
+ * Convert time string to a Unix timestamp (in seconds).
+ * Uses the pattern "yyyy-MM-dd HH:mm:ss" and will return null on failure.
+ * @group datetime_funcs
+ * @since 2.2.0
+ */
+ def to_timestamp(s: Column): Column = withExpr {
+ new ParseToTimestamp(s.expr, Literal("yyyy-MM-dd HH:mm:ss"))
+ }
+
+ /**
+ * Convert time string to a Unix timestamp (in seconds) with a specified format
+ * (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
+ * to Unix timestamp (in seconds), return null if fail.
+ * @group datetime_funcs
+ * @since 2.2.0
+ */
+ def to_timestamp(s: Column, fmt: String): Column = withExpr {
+ new ParseToTimestamp(s.expr, Literal(fmt))
+ }
+
+ /**
* Converts the column into DateType.
*
* @group datetime_funcs
@@ -2673,6 +2694,18 @@ object functions {
def to_date(e: Column): Column = withExpr { ToDate(e.expr) }
/**
+ * Converts the column into a DateType with a specified format
+ * (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
+ * return null if fail.
+ *
+ * @group datetime_funcs
+ * @since 2.2.0
+ */
+ def to_date(e: Column, fmt: String): Column = withExpr {
+ new ParseToDate(e.expr, Literal(fmt))
+ }
+
+ /**
* Returns date truncated to the unit specified by the format.
*
* @param format: 'year', 'yyyy', 'yy' for truncate by year,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index e05b2252ee..618db43446 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -354,31 +354,58 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
test("function to_date") {
val d1 = Date.valueOf("2015-07-22")
val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
val s1 = "2015-07-22 10:00:00"
val s2 = "2014-12-31"
- val df = Seq((d1, t1, s1), (d2, t2, s2)).toDF("d", "t", "s")
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
checkAnswer(
df.select(to_date(col("t"))),
- Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("d"))),
- Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01"))))
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.select(to_date(col("s"))),
- Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
checkAnswer(
df.selectExpr("to_date(t)"),
- Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(d)"),
- Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01"))))
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
checkAnswer(
df.selectExpr("to_date(s)"),
- Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31"))))
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // Now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2016-07-12"))))
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(Date.valueOf("2016-10-07")), Row(Date.valueOf("2016-07-12")),
+ Row(Date.valueOf("2014-12-31"))))
}
test("function trunc") {
@@ -475,6 +502,33 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
}
+
+ test("to_timestamp") {
+ val date1 = Date.valueOf("2015-07-24")
+ val date2 = Date.valueOf("2015-07-25")
+ val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
+ val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
+ val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
+ val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
+ val s1 = "2015/07/24 10:00:00.5"
+ val s2 = "2015/07/25 02:02:02.6"
+ val ss1 = "2015-07-24 10:00:00"
+ val ss2 = "2015-07-25 02:02:02"
+ val fmt = "yyyy/MM/dd HH:mm:ss.S"
+ val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+
+ checkAnswer(df.select(to_timestamp(col("ss"))),
+ df.select(unix_timestamp(col("ss")).cast("timestamp")))
+ checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
+ Row(ts1), Row(ts2)))
+ checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
+ Row(ts1), Row(ts2)))
+ checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(
+ Row(ts1), Row(ts2)))
+ checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq(
+ Row(ts_date1), Row(ts_date2)))
+ }
+
test("datediff") {
val df = Seq(
(Date.valueOf("2015-07-24"), Timestamp.valueOf("2015-07-24 01:00:00"),