diff options
author | Tarek Auel <tarek.auel@googlemail.com> | 2015-07-18 22:48:05 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-18 22:48:05 -0700 |
commit | 83b682beec884da76708769414108f4316e620f2 (patch) | |
tree | 1ee1d505a787434e962981a8cacc8ce69df48026 /sql/core | |
parent | 6cb6096c016178b9ce5c97592abe529ddb18cef2 (diff) | |
download | spark-83b682beec884da76708769414108f4316e620f2.tar.gz spark-83b682beec884da76708769414108f4316e620f2.tar.bz2 spark-83b682beec884da76708769414108f4316e620f2.zip |
[SPARK-8199][SPARK-8184][SPARK-8183][SPARK-8182][SPARK-8181][SPARK-8180][SPARK-8179][SPARK-8177][SPARK-8178][SPARK-9115][SQL] date functions
Jira:
https://issues.apache.org/jira/browse/SPARK-8199
https://issues.apache.org/jira/browse/SPARK-8184
https://issues.apache.org/jira/browse/SPARK-8183
https://issues.apache.org/jira/browse/SPARK-8182
https://issues.apache.org/jira/browse/SPARK-8181
https://issues.apache.org/jira/browse/SPARK-8180
https://issues.apache.org/jira/browse/SPARK-8179
https://issues.apache.org/jira/browse/SPARK-8177
https://issues.apache.org/jira/browse/SPARK-8179
https://issues.apache.org/jira/browse/SPARK-9115
Regarding `day`and `dayofmonth` are both necessary?
~~I am going to add `Quarter` to this PR as well.~~ Done.
~~As soon as the Scala coding is reviewed and discussed, I'll add the python api.~~ Done
Author: Tarek Auel <tarek.auel@googlemail.com>
Author: Tarek Auel <tarek.auel@gmail.com>
Closes #6981 from tarekauel/SPARK-8199 and squashes the following commits:
f7b4c8c [Tarek Auel] [SPARK-8199] fixed bug in tests
bb567b6 [Tarek Auel] [SPARK-8199] fixed test
3e095ba [Tarek Auel] [SPARK-8199] style and timezone fix
256c357 [Tarek Auel] [SPARK-8199] code cleanup
5983dcc [Tarek Auel] [SPARK-8199] whitespace fix
6e0c78f [Tarek Auel] [SPARK-8199] removed setTimeZone in tests, according to cloud-fans comment in #7488
4afc09c [Tarek Auel] [SPARK-8199] concise leap year handling
ea6c110 [Tarek Auel] [SPARK-8199] fix after merging master
70238e0 [Tarek Auel] Merge branch 'master' into SPARK-8199
3c6ae2e [Tarek Auel] [SPARK-8199] removed binary search
fb98ba0 [Tarek Auel] [SPARK-8199] python docstring fix
cdfae27 [Tarek Auel] [SPARK-8199] cleanup & python docstring fix
746b80a [Tarek Auel] [SPARK-8199] build fix
0ad6db8 [Tarek Auel] [SPARK-8199] minor fix
523542d [Tarek Auel] [SPARK-8199] address comments
2259299 [Tarek Auel] [SPARK-8199] day_of_month alias
d01b977 [Tarek Auel] [SPARK-8199] python underscore
56c4a92 [Tarek Auel] [SPARK-8199] update python docu
e223bc0 [Tarek Auel] [SPARK-8199] refactoring
d6aa14e [Tarek Auel] [SPARK-8199] fixed Hive compatibility
b382267 [Tarek Auel] [SPARK-8199] fixed bug in day calculation; removed set TimeZone in HiveCompatibilitySuite for test purposes; removed Hive tests for second and minute, because we can cast '2015-03-18' to a timestamp and extract a minute/second from it
1b2e540 [Tarek Auel] [SPARK-8119] style fix
0852655 [Tarek Auel] [SPARK-8119] changed from ExpectsInputTypes to implicit casts
ec87c69 [Tarek Auel] [SPARK-8119] bug fixing and refactoring
1358cdc [Tarek Auel] Merge remote-tracking branch 'origin/master' into SPARK-8199
740af0e [Tarek Auel] implement date function using a calculation based on days
4fb66da [Tarek Auel] WIP: date functions on calculation only
1a436c9 [Tarek Auel] wip
f775f39 [Tarek Auel] fixed return type
ad17e96 [Tarek Auel] improved implementation
c42b444 [Tarek Auel] Removed merge conflict file
ccb723c [Tarek Auel] [SPARK-8199] style and fixed merge issues
10e4ad1 [Tarek Auel] Merge branch 'master' into date-functions-fast
7d9f0eb [Tarek Auel] [SPARK-8199] git renaming issue
f3e7a9f [Tarek Auel] [SPARK-8199] revert change in DataFrameFunctionsSuite
6f5d95c [Tarek Auel] [SPARK-8199] fixed year interval
d9f8ac3 [Tarek Auel] [SPARK-8199] implement fast track
7bc9d93 [Tarek Auel] Merge branch 'master' into SPARK-8199
5a105d9 [Tarek Auel] [SPARK-8199] rebase after #6985 got merged
eb6760d [Tarek Auel] Merge branch 'master' into SPARK-8199
f120415 [Tarek Auel] improved runtime
a8edebd [Tarek Auel] use Calendar instead of SimpleDateFormat
5fe74e1 [Tarek Auel] fixed python style
3bfac90 [Tarek Auel] fixed style
356df78 [Tarek Auel] rely on cast mechanism of Spark. Simplified implementation
02efc5d [Tarek Auel] removed doubled code
a5ea120 [Tarek Auel] added python api; changed test to be more meaningful
b680db6 [Tarek Auel] added codegeneration to all functions
c739788 [Tarek Auel] added support for quarter SPARK-8178
849fb41 [Tarek Auel] fixed stupid test
638596f [Tarek Auel] improved codegen
4d8049b [Tarek Auel] fixed tests and added type check
5ebb235 [Tarek Auel] resolved naming conflict
d0e2f99 [Tarek Auel] date functions
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 176 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala | 170 |
2 files changed, 346 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c180407389..cadb25d597 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1748,6 +1748,182 @@ object functions { */ def length(columnName: String): Column = length(Column(columnName)) + ////////////////////////////////////////////////////////////////////////////////////////////// + // DateTime functions + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Converts a date/timestamp/string to a value of string in the format specified by the date + * format given by the second argument. + * + * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * + * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a + * specialized implementation. + * + * @group datetime_funcs + * @since 1.5.0 + */ + def date_format(dateExpr: Column, format: String): Column = + DateFormatClass(dateExpr.expr, Literal(format)) + + /** + * Converts a date/timestamp/string to a value of string in the format specified by the date + * format given by the second argument. + * + * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All + * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * + * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a + * specialized implementation. + * + * @group datetime_funcs + * @since 1.5.0 + */ + def date_format(dateColumnName: String, format: String): Column = + date_format(Column(dateColumnName), format) + + /** + * Extracts the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def year(e: Column): Column = Year(e.expr) + + /** + * Extracts the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def year(columnName: String): Column = year(Column(columnName)) + + /** + * Extracts the quarter as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def quarter(e: Column): Column = Quarter(e.expr) + + /** + * Extracts the quarter as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def quarter(columnName: String): Column = quarter(Column(columnName)) + + /** + * Extracts the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def month(e: Column): Column = Month(e.expr) + + /** + * Extracts the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def month(columnName: String): Column = month(Column(columnName)) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day(e: Column): Column = Day(e.expr) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day(columnName: String): Column = day(Column(columnName)) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_of_month(e: Column): Column = Day(e.expr) + + /** + * Extracts the day of the month as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_of_month(columnName: String): Column = day_of_month(Column(columnName)) + + /** + * Extracts the day of the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_in_year(e: Column): Column = DayInYear(e.expr) + + /** + * Extracts the day of the year as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def day_in_year(columnName: String): Column = day_in_year(Column(columnName)) + + /** + * Extracts the hours as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def hour(e: Column): Column = Hour(e.expr) + + /** + * Extracts the hours as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def hour(columnName: String): Column = hour(Column(columnName)) + + /** + * Extracts the minutes as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def minute(e: Column): Column = Minute(e.expr) + + /** + * Extracts the minutes as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def minute(columnName: String): Column = minute(Column(columnName)) + + /** + * Extracts the seconds as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def second(e: Column): Column = Second(e.expr) + + /** + * Extracts the seconds as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def second(columnName: String): Column = second(Column(columnName)) + + /** + * Extracts the week number as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def week_of_year(e: Column): Column = WeekOfYear(e.expr) + + /** + * Extracts the week number as an integer from a given date/timestamp/string. + * @group datetime_funcs + * @since 1.5.0 + */ + def week_of_year(columnName: String): Column = week_of_year(Column(columnName)) + /** * Formats the number X to a format like '#,###,###.##', rounded to d decimal places, * and returns the result as a string. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala new file mode 100644 index 0000000000..d24e3ee1dd --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateExpressionsSuite.scala @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import java.sql.{Timestamp, Date} +import java.text.SimpleDateFormat + +import org.apache.spark.sql.functions._ + +class DateExpressionsSuite extends QueryTest { + private lazy val ctx = org.apache.spark.sql.test.TestSQLContext + + import ctx.implicits._ + + val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val sdfDate = new SimpleDateFormat("yyyy-MM-dd") + val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime) + val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime) + + + test("date format") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(date_format("a", "y"), date_format("b", "y"), date_format("c", "y")), + Row("2015", "2015", "2013")) + + checkAnswer( + df.selectExpr("date_format(a, 'y')", "date_format(b, 'y')", "date_format(c, 'y')"), + Row("2015", "2015", "2013")) + } + + test("year") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(year("a"), year("b"), year("c")), + Row(2015, 2015, 2013)) + + checkAnswer( + df.selectExpr("year(a)", "year(b)", "year(c)"), + Row(2015, 2015, 2013)) + } + + test("quarter") { + val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime) + + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(quarter("a"), quarter("b"), quarter("c")), + Row(2, 2, 4)) + + checkAnswer( + df.selectExpr("quarter(a)", "quarter(b)", "quarter(c)"), + Row(2, 2, 4)) + } + + test("month") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(month("a"), month("b"), month("c")), + Row(4, 4, 4)) + + checkAnswer( + df.selectExpr("month(a)", "month(b)", "month(c)"), + Row(4, 4, 4)) + } + + test("day") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(day("a"), day("b"), day("c")), + Row(8, 8, 8)) + + checkAnswer( + df.selectExpr("day(a)", "day(b)", "day(c)"), + Row(8, 8, 8)) + } + + test("day of month") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(day_of_month("a"), day_of_month("b"), day_of_month("c")), + Row(8, 8, 8)) + + checkAnswer( + df.selectExpr("day_of_month(a)", "day_of_month(b)", "day_of_month(c)"), + Row(8, 8, 8)) + } + + test("day in year") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(day_in_year("a"), day_in_year("b"), day_in_year("c")), + Row(98, 98, 98)) + + checkAnswer( + df.selectExpr("day_in_year(a)", "day_in_year(b)", "day_in_year(c)"), + Row(98, 98, 98)) + } + + test("hour") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(hour("a"), hour("b"), hour("c")), + Row(0, 13, 13)) + + checkAnswer( + df.selectExpr("hour(a)", "hour(b)", "hour(c)"), + Row(0, 13, 13)) + } + + test("minute") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(minute("a"), minute("b"), minute("c")), + Row(0, 10, 10)) + + checkAnswer( + df.selectExpr("minute(a)", "minute(b)", "minute(c)"), + Row(0, 10, 10)) + } + + test("second") { + val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(second("a"), second("b"), second("c")), + Row(0, 15, 15)) + + checkAnswer( + df.selectExpr("second(a)", "second(b)", "second(c)"), + Row(0, 15, 15)) + } + + test("week of year") { + val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c") + + checkAnswer( + df.select(week_of_year("a"), week_of_year("b"), week_of_year("c")), + Row(15, 15, 15)) + + checkAnswer( + df.selectExpr("week_of_year(a)", "week_of_year(b)", "week_of_year(c)"), + Row(15, 15, 15)) + } + +} |