aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2015-07-30 13:21:46 -0700
committerDavies Liu <davies.liu@gmail.com>2015-07-30 13:21:46 -0700
commit1abf7dc16ca1ba1777fe874c8b81fe6f2b0a6de5 (patch)
tree8f41efd630790d6b27f32780087ce4c1a2889724 /python
parentd8cfd531c7c50c9b00ab546be458f44f84c386ae (diff)
downloadspark-1abf7dc16ca1ba1777fe874c8b81fe6f2b0a6de5.tar.gz
spark-1abf7dc16ca1ba1777fe874c8b81fe6f2b0a6de5.tar.bz2
spark-1abf7dc16ca1ba1777fe874c8b81fe6f2b0a6de5.zip
[SPARK-8186] [SPARK-8187] [SPARK-8194] [SPARK-8198] [SPARK-9133] [SPARK-9290] [SQL] functions: date_add, date_sub, add_months, months_between, time-interval calculation
This PR is based on #7589 , thanks to adrian-wang Added SQL function date_add, date_sub, add_months, month_between, also add a rule for add/subtract of date/timestamp and interval. Closes #7589 cc rxin Author: Daoyuan Wang <daoyuan.wang@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7754 from davies/date_add and squashes the following commits: e8c633a [Davies Liu] Merge branch 'master' of github.com:apache/spark into date_add 9e8e085 [Davies Liu] Merge branch 'master' of github.com:apache/spark into date_add 6224ce4 [Davies Liu] fix conclict bd18cd4 [Davies Liu] Merge branch 'master' of github.com:apache/spark into date_add e47ff2c [Davies Liu] add python api, fix date functions 01943d0 [Davies Liu] Merge branch 'master' into date_add 522e91a [Daoyuan Wang] fix e8a639a [Daoyuan Wang] fix 42df486 [Daoyuan Wang] fix style 87c4b77 [Daoyuan Wang] function add_months, months_between and some fixes 1a68e03 [Daoyuan Wang] poc of time interval calculation c506661 [Daoyuan Wang] function date_add , date_sub
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py76
1 files changed, 64 insertions, 12 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d930f7db25..a7295e25f0 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -59,7 +59,7 @@ __all__ = [
__all__ += ['lag', 'lead', 'ntile']
__all__ += [
- 'date_format',
+ 'date_format', 'date_add', 'date_sub', 'add_months', 'months_between',
'year', 'quarter', 'month', 'hour', 'minute', 'second',
'dayofmonth', 'dayofyear', 'weekofyear']
@@ -716,7 +716,7 @@ def date_format(dateCol, format):
[Row(date=u'04/08/2015')]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.date_format(dateCol, format))
+ return Column(sc._jvm.functions.date_format(_to_java_column(dateCol), format))
@since(1.5)
@@ -729,7 +729,7 @@ def year(col):
[Row(year=2015)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.year(col))
+ return Column(sc._jvm.functions.year(_to_java_column(col)))
@since(1.5)
@@ -742,7 +742,7 @@ def quarter(col):
[Row(quarter=2)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.quarter(col))
+ return Column(sc._jvm.functions.quarter(_to_java_column(col)))
@since(1.5)
@@ -755,7 +755,7 @@ def month(col):
[Row(month=4)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.month(col))
+ return Column(sc._jvm.functions.month(_to_java_column(col)))
@since(1.5)
@@ -768,7 +768,7 @@ def dayofmonth(col):
[Row(day=8)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.dayofmonth(col))
+ return Column(sc._jvm.functions.dayofmonth(_to_java_column(col)))
@since(1.5)
@@ -781,7 +781,7 @@ def dayofyear(col):
[Row(day=98)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.dayofyear(col))
+ return Column(sc._jvm.functions.dayofyear(_to_java_column(col)))
@since(1.5)
@@ -794,7 +794,7 @@ def hour(col):
[Row(hour=13)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.hour(col))
+ return Column(sc._jvm.functions.hour(_to_java_column(col)))
@since(1.5)
@@ -807,7 +807,7 @@ def minute(col):
[Row(minute=8)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.minute(col))
+ return Column(sc._jvm.functions.minute(_to_java_column(col)))
@since(1.5)
@@ -820,7 +820,7 @@ def second(col):
[Row(second=15)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.second(col))
+ return Column(sc._jvm.functions.second(_to_java_column(col)))
@since(1.5)
@@ -829,11 +829,63 @@ def weekofyear(col):
Extract the week number of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
- >>> df.select(weekofyear('a').alias('week')).collect()
+ >>> df.select(weekofyear(df.a).alias('week')).collect()
[Row(week=15)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.weekofyear(col))
+ return Column(sc._jvm.functions.weekofyear(_to_java_column(col)))
+
+
+@since(1.5)
+def date_add(start, days):
+ """
+ Returns the date that is `days` days after `start`
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['d'])
+ >>> df.select(date_add(df.d, 1).alias('d')).collect()
+ [Row(d=datetime.date(2015, 4, 9))]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.date_add(_to_java_column(start), days))
+
+
+@since(1.5)
+def date_sub(start, days):
+ """
+ Returns the date that is `days` days before `start`
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['d'])
+ >>> df.select(date_sub(df.d, 1).alias('d')).collect()
+ [Row(d=datetime.date(2015, 4, 7))]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.date_sub(_to_java_column(start), days))
+
+
+@since(1.5)
+def add_months(start, months):
+ """
+ Returns the date that is `months` months after `start`
+
+ >>> df = sqlContext.createDataFrame([('2015-04-08',)], ['d'])
+ >>> df.select(add_months(df.d, 1).alias('d')).collect()
+ [Row(d=datetime.date(2015, 5, 8))]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.add_months(_to_java_column(start), months))
+
+
+@since(1.5)
+def months_between(date1, date2):
+ """
+ Returns the number of months between date1 and date2.
+
+ >>> df = sqlContext.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['t', 'd'])
+ >>> df.select(months_between(df.t, df.d).alias('months')).collect()
+ [Row(months=3.9495967...)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.months_between(_to_java_column(date1), _to_java_column(date2)))
@since(1.5)