aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorTarek Auel <tarek.auel@googlemail.com>2015-07-18 22:48:05 -0700
committerReynold Xin <rxin@databricks.com>2015-07-18 22:48:05 -0700
commit83b682beec884da76708769414108f4316e620f2 (patch)
tree1ee1d505a787434e962981a8cacc8ce69df48026 /sql/core
parent6cb6096c016178b9ce5c97592abe529ddb18cef2 (diff)
downloadspark-83b682beec884da76708769414108f4316e620f2.tar.gz
spark-83b682beec884da76708769414108f4316e620f2.tar.bz2
spark-83b682beec884da76708769414108f4316e620f2.zip
[SPARK-8199][SPARK-8184][SPARK-8183][SPARK-8182][SPARK-8181][SPARK-8180][SPARK-8179][SPARK-8177][SPARK-8178][SPARK-9115][SQL] date functions
Jira: https://issues.apache.org/jira/browse/SPARK-8199 https://issues.apache.org/jira/browse/SPARK-8184 https://issues.apache.org/jira/browse/SPARK-8183 https://issues.apache.org/jira/browse/SPARK-8182 https://issues.apache.org/jira/browse/SPARK-8181 https://issues.apache.org/jira/browse/SPARK-8180 https://issues.apache.org/jira/browse/SPARK-8179 https://issues.apache.org/jira/browse/SPARK-8177 https://issues.apache.org/jira/browse/SPARK-8179 https://issues.apache.org/jira/browse/SPARK-9115 Regarding `day`and `dayofmonth` are both necessary? ~~I am going to add `Quarter` to this PR as well.~~ Done. ~~As soon as the Scala coding is reviewed and discussed, I'll add the python api.~~ Done Author: Tarek Auel <tarek.auel@googlemail.com> Author: Tarek Auel <tarek.auel@gmail.com> Closes #6981 from tarekauel/SPARK-8199 and squashes the following commits: f7b4c8c [Tarek Auel] [SPARK-8199] fixed bug in tests bb567b6 [Tarek Auel] [SPARK-8199] fixed test 3e095ba [Tarek Auel] [SPARK-8199] style and timezone fix 256c357 [Tarek Auel] [SPARK-8199] code cleanup 5983dcc [Tarek Auel] [SPARK-8199] whitespace fix 6e0c78f [Tarek Auel] [SPARK-8199] removed setTimeZone in tests, according to cloud-fans comment in #7488 4afc09c [Tarek Auel] [SPARK-8199] concise leap year handling ea6c110 [Tarek Auel] [SPARK-8199] fix after merging master 70238e0 [Tarek Auel] Merge branch 'master' into SPARK-8199 3c6ae2e [Tarek Auel] [SPARK-8199] removed binary search fb98ba0 [Tarek Auel] [SPARK-8199] python docstring fix cdfae27 [Tarek Auel] [SPARK-8199] cleanup & python docstring fix 746b80a [Tarek Auel] [SPARK-8199] build fix 0ad6db8 [Tarek Auel] [SPARK-8199] minor fix 523542d [Tarek Auel] [SPARK-8199] address comments 2259299 [Tarek Auel] [SPARK-8199] day_of_month alias d01b977 [Tarek Auel] [SPARK-8199] python underscore 56c4a92 [Tarek Auel] [SPARK-8199] update python docu e223bc0 [Tarek Auel] [SPARK-8199] refactoring d6aa14e [Tarek Auel] [SPARK-8199] fixed Hive compatibility b382267 [Tarek Auel] [SPARK-8199] fixed bug in day calculation; removed set TimeZone in HiveCompatibilitySuite for test purposes; removed Hive tests for second and minute, because we can cast '2015-03-18' to a timestamp and extract a minute/second from it 1b2e540 [Tarek Auel] [SPARK-8119] style fix 0852655 [Tarek Auel] [SPARK-8119] changed from ExpectsInputTypes to implicit casts ec87c69 [Tarek Auel] [SPARK-8119] bug fixing and refactoring 1358cdc [Tarek Auel] Merge remote-tracking branch 'origin/master' into SPARK-8199 740af0e [Tarek Auel] implement date function using a calculation based on days 4fb66da [Tarek Auel] WIP: date functions on calculation only 1a436c9 [Tarek Auel] wip f775f39 [Tarek Auel] fixed return type ad17e96 [Tarek Auel] improved implementation c42b444 [Tarek Auel] Removed merge conflict file ccb723c [Tarek Auel] [SPARK-8199] style and fixed merge issues 10e4ad1 [Tarek Auel] Merge branch 'master' into date-functions-fast 7d9f0eb [Tarek Auel] [SPARK-8199] git renaming issue f3e7a9f [Tarek Auel] [SPARK-8199] revert change in DataFrameFunctionsSuite 6f5d95c [Tarek Auel] [SPARK-8199] fixed year interval d9f8ac3 [Tarek Auel] [SPARK-8199] implement fast track 7bc9d93 [Tarek Auel] Merge branch 'master' into SPARK-8199 5a105d9 [Tarek Auel] [SPARK-8199] rebase after #6985 got merged eb6760d [Tarek Auel] Merge branch 'master' into SPARK-8199 f120415 [Tarek Auel] improved runtime a8edebd [Tarek Auel] use Calendar instead of SimpleDateFormat 5fe74e1 [Tarek Auel] fixed python style 3bfac90 [Tarek Auel] fixed style 356df78 [Tarek Auel] rely on cast mechanism of Spark. Simplified implementation 02efc5d [Tarek Auel] removed doubled code a5ea120 [Tarek Auel] added python api; changed test to be more meaningful b680db6 [Tarek Auel] added codegeneration to all functions c739788 [Tarek Auel] added support for quarter SPARK-8178 849fb41 [Tarek Auel] fixed stupid test 638596f [Tarek Auel] improved codegen 4d8049b [Tarek Auel] fixed tests and added type check 5ebb235 [Tarek Auel] resolved naming conflict d0e2f99 [Tarek Auel] date functions
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala176
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala170
2 files changed, 346 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index c180407389..cadb25d597 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1748,6 +1748,182 @@ object functions {
*/
def length(columnName: String): Column = length(Column(columnName))
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ // DateTime functions
+ //////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Converts a date/timestamp/string to a value of string in the format specified by the date
+ * format given by the second argument.
+ *
+ * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
+ * pattern letters of [[java.text.SimpleDateFormat]] can be used.
+ *
+ * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a
+ * specialized implementation.
+ *
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def date_format(dateExpr: Column, format: String): Column =
+ DateFormatClass(dateExpr.expr, Literal(format))
+
+ /**
+ * Converts a date/timestamp/string to a value of string in the format specified by the date
+ * format given by the second argument.
+ *
+ * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
+ * pattern letters of [[java.text.SimpleDateFormat]] can be used.
+ *
+ * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a
+ * specialized implementation.
+ *
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def date_format(dateColumnName: String, format: String): Column =
+ date_format(Column(dateColumnName), format)
+
+ /**
+ * Extracts the year as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def year(e: Column): Column = Year(e.expr)
+
+ /**
+ * Extracts the year as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def year(columnName: String): Column = year(Column(columnName))
+
+ /**
+ * Extracts the quarter as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def quarter(e: Column): Column = Quarter(e.expr)
+
+ /**
+ * Extracts the quarter as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def quarter(columnName: String): Column = quarter(Column(columnName))
+
+ /**
+ * Extracts the month as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def month(e: Column): Column = Month(e.expr)
+
+ /**
+ * Extracts the month as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def month(columnName: String): Column = month(Column(columnName))
+
+ /**
+ * Extracts the day of the month as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def day(e: Column): Column = Day(e.expr)
+
+ /**
+ * Extracts the day of the month as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def day(columnName: String): Column = day(Column(columnName))
+
+ /**
+ * Extracts the day of the month as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def day_of_month(e: Column): Column = Day(e.expr)
+
+ /**
+ * Extracts the day of the month as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def day_of_month(columnName: String): Column = day_of_month(Column(columnName))
+
+ /**
+ * Extracts the day of the year as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def day_in_year(e: Column): Column = DayInYear(e.expr)
+
+ /**
+ * Extracts the day of the year as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def day_in_year(columnName: String): Column = day_in_year(Column(columnName))
+
+ /**
+ * Extracts the hours as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def hour(e: Column): Column = Hour(e.expr)
+
+ /**
+ * Extracts the hours as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def hour(columnName: String): Column = hour(Column(columnName))
+
+ /**
+ * Extracts the minutes as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def minute(e: Column): Column = Minute(e.expr)
+
+ /**
+ * Extracts the minutes as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def minute(columnName: String): Column = minute(Column(columnName))
+
+ /**
+ * Extracts the seconds as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def second(e: Column): Column = Second(e.expr)
+
+ /**
+ * Extracts the seconds as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def second(columnName: String): Column = second(Column(columnName))
+
+ /**
+ * Extracts the week number as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def week_of_year(e: Column): Column = WeekOfYear(e.expr)
+
+ /**
+ * Extracts the week number as an integer from a given date/timestamp/string.
+ * @group datetime_funcs
+ * @since 1.5.0
+ */
+ def week_of_year(columnName: String): Column = week_of_year(Column(columnName))
+
/**
* Formats the number X to a format like '#,###,###.##', rounded to d decimal places,
* and returns the result as a string.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala
new file mode 100644
index 0000000000..d24e3ee1dd
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.sql.{Timestamp, Date}
+import java.text.SimpleDateFormat
+
+import org.apache.spark.sql.functions._
+
+class DateExpressionsSuite extends QueryTest {
+ private lazy val ctx = org.apache.spark.sql.test.TestSQLContext
+
+ import ctx.implicits._
+
+ val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+ val sdfDate = new SimpleDateFormat("yyyy-MM-dd")
+ val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
+ val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
+
+
+ test("date format") {
+ val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(date_format("a", "y"), date_format("b", "y"), date_format("c", "y")),
+ Row("2015", "2015", "2013"))
+
+ checkAnswer(
+ df.selectExpr("date_format(a, 'y')", "date_format(b, 'y')", "date_format(c, 'y')"),
+ Row("2015", "2015", "2013"))
+ }
+
+ test("year") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(year("a"), year("b"), year("c")),
+ Row(2015, 2015, 2013))
+
+ checkAnswer(
+ df.selectExpr("year(a)", "year(b)", "year(c)"),
+ Row(2015, 2015, 2013))
+ }
+
+ test("quarter") {
+ val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime)
+
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(quarter("a"), quarter("b"), quarter("c")),
+ Row(2, 2, 4))
+
+ checkAnswer(
+ df.selectExpr("quarter(a)", "quarter(b)", "quarter(c)"),
+ Row(2, 2, 4))
+ }
+
+ test("month") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(month("a"), month("b"), month("c")),
+ Row(4, 4, 4))
+
+ checkAnswer(
+ df.selectExpr("month(a)", "month(b)", "month(c)"),
+ Row(4, 4, 4))
+ }
+
+ test("day") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(day("a"), day("b"), day("c")),
+ Row(8, 8, 8))
+
+ checkAnswer(
+ df.selectExpr("day(a)", "day(b)", "day(c)"),
+ Row(8, 8, 8))
+ }
+
+ test("day of month") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(day_of_month("a"), day_of_month("b"), day_of_month("c")),
+ Row(8, 8, 8))
+
+ checkAnswer(
+ df.selectExpr("day_of_month(a)", "day_of_month(b)", "day_of_month(c)"),
+ Row(8, 8, 8))
+ }
+
+ test("day in year") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(day_in_year("a"), day_in_year("b"), day_in_year("c")),
+ Row(98, 98, 98))
+
+ checkAnswer(
+ df.selectExpr("day_in_year(a)", "day_in_year(b)", "day_in_year(c)"),
+ Row(98, 98, 98))
+ }
+
+ test("hour") {
+ val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(hour("a"), hour("b"), hour("c")),
+ Row(0, 13, 13))
+
+ checkAnswer(
+ df.selectExpr("hour(a)", "hour(b)", "hour(c)"),
+ Row(0, 13, 13))
+ }
+
+ test("minute") {
+ val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(minute("a"), minute("b"), minute("c")),
+ Row(0, 10, 10))
+
+ checkAnswer(
+ df.selectExpr("minute(a)", "minute(b)", "minute(c)"),
+ Row(0, 10, 10))
+ }
+
+ test("second") {
+ val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(second("a"), second("b"), second("c")),
+ Row(0, 15, 15))
+
+ checkAnswer(
+ df.selectExpr("second(a)", "second(b)", "second(c)"),
+ Row(0, 15, 15))
+ }
+
+ test("week of year") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(week_of_year("a"), week_of_year("b"), week_of_year("c")),
+ Row(15, 15, 15))
+
+ checkAnswer(
+ df.selectExpr("week_of_year(a)", "week_of_year(b)", "week_of_year(c)"),
+ Row(15, 15, 15))
+ }
+
+}