aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorTarek Auel <tarek.auel@googlemail.com>2015-07-18 22:48:05 -0700
committerReynold Xin <rxin@databricks.com>2015-07-18 22:48:05 -0700
commit83b682beec884da76708769414108f4316e620f2 (patch)
tree1ee1d505a787434e962981a8cacc8ce69df48026 /python/pyspark
parent6cb6096c016178b9ce5c97592abe529ddb18cef2 (diff)
downloadspark-83b682beec884da76708769414108f4316e620f2.tar.gz
spark-83b682beec884da76708769414108f4316e620f2.tar.bz2
spark-83b682beec884da76708769414108f4316e620f2.zip
[SPARK-8199][SPARK-8184][SPARK-8183][SPARK-8182][SPARK-8181][SPARK-8180][SPARK-8179][SPARK-8177][SPARK-8178][SPARK-9115][SQL] date functions
Jira: https://issues.apache.org/jira/browse/SPARK-8199 https://issues.apache.org/jira/browse/SPARK-8184 https://issues.apache.org/jira/browse/SPARK-8183 https://issues.apache.org/jira/browse/SPARK-8182 https://issues.apache.org/jira/browse/SPARK-8181 https://issues.apache.org/jira/browse/SPARK-8180 https://issues.apache.org/jira/browse/SPARK-8179 https://issues.apache.org/jira/browse/SPARK-8177 https://issues.apache.org/jira/browse/SPARK-8179 https://issues.apache.org/jira/browse/SPARK-9115 Regarding `day`and `dayofmonth` are both necessary? ~~I am going to add `Quarter` to this PR as well.~~ Done. ~~As soon as the Scala coding is reviewed and discussed, I'll add the python api.~~ Done Author: Tarek Auel <tarek.auel@googlemail.com> Author: Tarek Auel <tarek.auel@gmail.com> Closes #6981 from tarekauel/SPARK-8199 and squashes the following commits: f7b4c8c [Tarek Auel] [SPARK-8199] fixed bug in tests bb567b6 [Tarek Auel] [SPARK-8199] fixed test 3e095ba [Tarek Auel] [SPARK-8199] style and timezone fix 256c357 [Tarek Auel] [SPARK-8199] code cleanup 5983dcc [Tarek Auel] [SPARK-8199] whitespace fix 6e0c78f [Tarek Auel] [SPARK-8199] removed setTimeZone in tests, according to cloud-fans comment in #7488 4afc09c [Tarek Auel] [SPARK-8199] concise leap year handling ea6c110 [Tarek Auel] [SPARK-8199] fix after merging master 70238e0 [Tarek Auel] Merge branch 'master' into SPARK-8199 3c6ae2e [Tarek Auel] [SPARK-8199] removed binary search fb98ba0 [Tarek Auel] [SPARK-8199] python docstring fix cdfae27 [Tarek Auel] [SPARK-8199] cleanup & python docstring fix 746b80a [Tarek Auel] [SPARK-8199] build fix 0ad6db8 [Tarek Auel] [SPARK-8199] minor fix 523542d [Tarek Auel] [SPARK-8199] address comments 2259299 [Tarek Auel] [SPARK-8199] day_of_month alias d01b977 [Tarek Auel] [SPARK-8199] python underscore 56c4a92 [Tarek Auel] [SPARK-8199] update python docu e223bc0 [Tarek Auel] [SPARK-8199] refactoring d6aa14e [Tarek Auel] [SPARK-8199] fixed Hive compatibility b382267 [Tarek Auel] [SPARK-8199] fixed bug in day calculation; removed set TimeZone in HiveCompatibilitySuite for test purposes; removed Hive tests for second and minute, because we can cast '2015-03-18' to a timestamp and extract a minute/second from it 1b2e540 [Tarek Auel] [SPARK-8119] style fix 0852655 [Tarek Auel] [SPARK-8119] changed from ExpectsInputTypes to implicit casts ec87c69 [Tarek Auel] [SPARK-8119] bug fixing and refactoring 1358cdc [Tarek Auel] Merge remote-tracking branch 'origin/master' into SPARK-8199 740af0e [Tarek Auel] implement date function using a calculation based on days 4fb66da [Tarek Auel] WIP: date functions on calculation only 1a436c9 [Tarek Auel] wip f775f39 [Tarek Auel] fixed return type ad17e96 [Tarek Auel] improved implementation c42b444 [Tarek Auel] Removed merge conflict file ccb723c [Tarek Auel] [SPARK-8199] style and fixed merge issues 10e4ad1 [Tarek Auel] Merge branch 'master' into date-functions-fast 7d9f0eb [Tarek Auel] [SPARK-8199] git renaming issue f3e7a9f [Tarek Auel] [SPARK-8199] revert change in DataFrameFunctionsSuite 6f5d95c [Tarek Auel] [SPARK-8199] fixed year interval d9f8ac3 [Tarek Auel] [SPARK-8199] implement fast track 7bc9d93 [Tarek Auel] Merge branch 'master' into SPARK-8199 5a105d9 [Tarek Auel] [SPARK-8199] rebase after #6985 got merged eb6760d [Tarek Auel] Merge branch 'master' into SPARK-8199 f120415 [Tarek Auel] improved runtime a8edebd [Tarek Auel] use Calendar instead of SimpleDateFormat 5fe74e1 [Tarek Auel] fixed python style 3bfac90 [Tarek Auel] fixed style 356df78 [Tarek Auel] rely on cast mechanism of Spark. Simplified implementation 02efc5d [Tarek Auel] removed doubled code a5ea120 [Tarek Auel] added python api; changed test to be more meaningful b680db6 [Tarek Auel] added codegeneration to all functions c739788 [Tarek Auel] added support for quarter SPARK-8178 849fb41 [Tarek Auel] fixed stupid test 638596f [Tarek Auel] improved codegen 4d8049b [Tarek Auel] fixed tests and added type check 5ebb235 [Tarek Auel] resolved naming conflict d0e2f99 [Tarek Auel] date functions
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/functions.py150
1 files changed, 150 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e0816b3e65..0aca378892 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -652,6 +652,156 @@ def ntile(n):
return Column(sc._jvm.functions.ntile(int(n)))
+@ignore_unicode_prefix
+@since(1.5)
+def date_format(dateCol, format):
+ """
+ Converts a date/timestamp/string to a value of string in the format specified by the date
+ format given by the second argument.
+
+ A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
+ pattern letters of the Java class `java.text.SimpleDateFormat` can be used.
+
+ NOTE: Use when ever possible specialized functions like `year`. These benefit from a
+ specialized implementation.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect()
+ [Row(date=u'04/08/2015')]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.date_format(dateCol, format))
+
+
+@since(1.5)
+def year(col):
+ """
+ Extract the year of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(year('a').alias('year')).collect()
+ [Row(year=2015)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.year(col))
+
+
+@since(1.5)
+def quarter(col):
+ """
+ Extract the quarter of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(quarter('a').alias('quarter')).collect()
+ [Row(quarter=2)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.quarter(col))
+
+
+@since(1.5)
+def month(col):
+ """
+ Extract the month of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(month('a').alias('month')).collect()
+ [Row(month=4)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.month(col))
+
+
+@since(1.5)
+def day(col):
+ """
+ Extract the day of the month of a given date as integer.
+
+ >>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(day('a').alias('day')).collect()
+ [Row(day=8)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.day(col))
+
+
+@since(1.5)
+def day_of_month(col):
+ """
+ Extract the day of the month of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(day_of_month('a').alias('day')).collect()
+ [Row(day=8)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.day_of_month(col))
+
+
+@since(1.5)
+def day_in_year(col):
+ """
+ Extract the day of the year of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(day_in_year('a').alias('day')).collect()
+ [Row(day=98)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.day_in_year(col))
+
+
+@since(1.5)
+def hour(col):
+ """
+ Extract the hours of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
+ >>> df.select(hour('a').alias('hour')).collect()
+ [Row(hour=13)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.hour(col))
+
+
+@since(1.5)
+def minute(col):
+ """
+ Extract the minutes of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
+ >>> df.select(minute('a').alias('minute')).collect()
+ [Row(minute=8)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.minute(col))
+
+
+@since(1.5)
+def second(col):
+ """
+ Extract the seconds of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08 13:08:15',)], ['a'])
+ >>> df.select(second('a').alias('second')).collect()
+ [Row(second=15)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.second(col))
+
+
+@since(1.5)
+def week_of_year(col):
+ """
+ Extract the week number of a given date as integer.
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
+ >>> df.select(week_of_year('a').alias('week')).collect()
+ [Row(week=15)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.week_of_year(col))
+
+
class UserDefinedFunction(object):
"""
User defined function in Python