aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-08-01 21:46:46 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-01 21:46:46 -0700
commitc1b0cbd762d78bedca0ab564cf9ca0970b7b99d2 (patch)
treecbdb4059022abbf2cc502d1fd920f3c29f6e2618 /sql/core
parent00cd92f32f17ca57d47aa2dcc716eb707aaee799 (diff)
downloadspark-c1b0cbd762d78bedca0ab564cf9ca0970b7b99d2.tar.gz
spark-c1b0cbd762d78bedca0ab564cf9ca0970b7b99d2.tar.bz2
spark-c1b0cbd762d78bedca0ab564cf9ca0970b7b99d2.zip
[SPARK-8185] [SPARK-8188] [SPARK-8191] [SQL] function datediff, to_utc_timestamp, from_utc_timestamp
This PR is based on #7643 , thanks to adrian-wang Author: Davies Liu <davies@databricks.com> Author: Daoyuan Wang <daoyuan.wang@intel.com> Closes #7847 from davies/datediff and squashes the following commits: 74333d7 [Davies Liu] fix bug 22d8a8c [Davies Liu] optimize 85cdd21 [Davies Liu] remove unnecessary tests 241d90c [Davies Liu] Merge branch 'master' of github.com:apache/spark into datediff e9dc0f5 [Davies Liu] fix datediff/to_utc_timestamp/from_utc_timestamp c360447 [Daoyuan Wang] function datediff, to_utc_timestamp, from_utc_timestamp (commits merged)
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala22
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala47
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala52
3 files changed, 72 insertions, 49 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 818aa109f3..197cd3de61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2020,6 +2020,13 @@ object functions {
def date_sub(start: Column, days: Int): Column = DateSub(start.expr, Literal(days))
/**
+ * Returns the number of days from `start` to `end`.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def datediff(end: Column, start: Column): Column = DateDiff(end.expr, start.expr)
+
+ /**
* Extracts the year as an integer from a given date/timestamp/string.
* @group datetime_funcs
* @since 1.5.0
@@ -2238,6 +2245,21 @@ object functions {
*/
def trunc(date: Column, format: String): Column = TruncDate(date.expr, Literal(format))
+ /**
+ * Assumes given timestamp is UTC and converts to given timezone.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def from_utc_timestamp(ts: Column, tz: String): Column =
+ FromUTCTimestamp(ts.expr, Literal(tz).expr)
+
+ /**
+ * Assumes given timestamp is in given timezone and converts to UTC.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def to_utc_timestamp(ts: Column, tz: String): Column = ToUTCTimestamp(ts.expr, Literal(tz).expr)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Collection functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 8c596fad74..0850f5cf77 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -445,4 +445,51 @@ class DateFunctionsSuite extends QueryTest {
Row(ts1.getTime / 1000L), Row(ts2.getTime / 1000L)))
}
+ test("datediff") {
+ val df = Seq(
+ (Date.valueOf("2015-07-24"), Timestamp.valueOf("2015-07-24 01:00:00"),
+ "2015-07-23", "2015-07-23 03:00:00"),
+ (Date.valueOf("2015-07-25"), Timestamp.valueOf("2015-07-25 02:00:00"),
+ "2015-07-24", "2015-07-24 04:00:00")
+ ).toDF("a", "b", "c", "d")
+ checkAnswer(df.select(datediff(col("a"), col("b"))), Seq(Row(0), Row(0)))
+ checkAnswer(df.select(datediff(col("a"), col("c"))), Seq(Row(1), Row(1)))
+ checkAnswer(df.select(datediff(col("d"), col("b"))), Seq(Row(-1), Row(-1)))
+ checkAnswer(df.selectExpr("datediff(a, d)"), Seq(Row(1), Row(1)))
+ }
+
+ test("from_utc_timestamp") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+ ).toDF("a", "b")
+ checkAnswer(
+ df.select(from_utc_timestamp(col("a"), "PST")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+ checkAnswer(
+ df.select(from_utc_timestamp(col("b"), "PST")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+ Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+ }
+
+ test("to_utc_timestamp") {
+ val df = Seq(
+ (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
+ (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
+ ).toDF("a", "b")
+ checkAnswer(
+ df.select(to_utc_timestamp(col("a"), "PST")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+ checkAnswer(
+ df.select(to_utc_timestamp(col("b"), "PST")),
+ Seq(
+ Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+ Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+ }
+
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 1c1be0c3cc..ab5da6ee79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -176,58 +176,12 @@ class StringFunctionsSuite extends QueryTest {
test("string substring_index function") {
val df = Seq(("www.apache.org", ".", "zz")).toDF("a", "b", "c")
checkAnswer(
- df.select(substring_index($"a", ".", 3)),
- Row("www.apache.org"))
- checkAnswer(
df.select(substring_index($"a", ".", 2)),
Row("www.apache"))
checkAnswer(
- df.select(substring_index($"a", ".", 1)),
- Row("www"))
- checkAnswer(
- df.select(substring_index($"a", ".", 0)),
- Row(""))
- checkAnswer(
- df.select(substring_index(lit("www.apache.org"), ".", -1)),
- Row("org"))
- checkAnswer(
- df.select(substring_index(lit("www.apache.org"), ".", -2)),
- Row("apache.org"))
- checkAnswer(
- df.select(substring_index(lit("www.apache.org"), ".", -3)),
- Row("www.apache.org"))
- // str is empty string
- checkAnswer(
- df.select(substring_index(lit(""), ".", 1)),
- Row(""))
- // empty string delim
- checkAnswer(
- df.select(substring_index(lit("www.apache.org"), "", 1)),
- Row(""))
- // delim does not exist in str
- checkAnswer(
- df.select(substring_index(lit("www.apache.org"), "#", 1)),
- Row("www.apache.org"))
- // delim is 2 chars
- checkAnswer(
- df.select(substring_index(lit("www||apache||org"), "||", 2)),
- Row("www||apache"))
- checkAnswer(
- df.select(substring_index(lit("www||apache||org"), "||", -2)),
- Row("apache||org"))
- // null
- checkAnswer(
- df.select(substring_index(lit(null), "||", 2)),
- Row(null))
- checkAnswer(
- df.select(substring_index(lit("www.apache.org"), null, 2)),
- Row(null))
- // non ascii chars
- // scalastyle:off
- checkAnswer(
- df.selectExpr("""substring_index("大千世界大千世界", "千", 2)"""),
- Row("大千世界大"))
- // scalastyle:on
+ df.selectExpr("substring_index(a, '.', 2)"),
+ Row("www.apache")
+ )
}
test("string locate function") {