aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-07-19 01:17:22 -0700
committerReynold Xin <rxin@databricks.com>2015-07-19 01:17:22 -0700
commit3427937ea2a4ed19142bd3d66707864879417d61 (patch)
tree3713bf77e9894e73c2b436f9f79ccf29eabb9713 /python/pyspark/sql
parenta53d13f7aa5d44c706e5510f57399a32c7558b80 (diff)
downloadspark-3427937ea2a4ed19142bd3d66707864879417d61.tar.gz
spark-3427937ea2a4ed19142bd3d66707864879417d61.tar.bz2
spark-3427937ea2a4ed19142bd3d66707864879417d61.zip
[SQL] Make date/time functions more consistent with other database systems.
This pull request fixes some of the problems in #6981. - Added date functions to `__all__` so they get exposed - Rename day_of_month -> dayofmonth - Rename day_in_year -> dayofyear - Rename week_of_year -> weekofyear - Removed "day" from Scala/Python API since it is ambiguous. Only leaving the alias in SQL. Author: Reynold Xin <rxin@databricks.com> This patch had conflicts when merged, resolved by Committer: Reynold Xin <rxin@databricks.com> Closes #7506 from rxin/datetime and squashes the following commits: 0cb24d9 [Reynold Xin] Export all functions in Python. e44a4a0 [Reynold Xin] Removed day function from Scala and Python. 9c08fdc [Reynold Xin] [SQL] Make date/time functions more consistent with other database systems.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r--python/pyspark/sql/functions.py35
1 files changed, 14 insertions, 21 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 0aca378892..fd5a3ba8ad 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -55,6 +55,11 @@ __all__ = [
__all__ += ['lag', 'lead', 'ntile']
+__all__ += [
+ 'date_format',
+ 'year', 'quarter', 'month', 'hour', 'minute', 'second',
+ 'dayofmonth', 'dayofyear', 'weekofyear']
+
def _create_function(name, doc=""):
""" Create a function for aggregator by name"""
@@ -713,41 +718,29 @@ def month(col):
@since(1.5)
-def day(col):
- """
- Extract the day of the month of a given date as integer.
-
- >>> sqlContext.createDataFrame([('2015-04-08',)], ['a']).select(day('a').alias('day')).collect()
- [Row(day=8)]
- """
- sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.day(col))
-
-
-@since(1.5)
-def day_of_month(col):
+def dayofmonth(col):
"""
Extract the day of the month of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
- >>> df.select(day_of_month('a').alias('day')).collect()
+ >>> df.select(dayofmonth('a').alias('day')).collect()
[Row(day=8)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.day_of_month(col))
+ return Column(sc._jvm.functions.dayofmonth(col))
@since(1.5)
-def day_in_year(col):
+def dayofyear(col):
"""
Extract the day of the year of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
- >>> df.select(day_in_year('a').alias('day')).collect()
+ >>> df.select(dayofyear('a').alias('day')).collect()
[Row(day=98)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.day_in_year(col))
+ return Column(sc._jvm.functions.dayofyear(col))
@since(1.5)
@@ -790,16 +783,16 @@ def second(col):
@since(1.5)
-def week_of_year(col):
+def weekofyear(col):
"""
Extract the week number of a given date as integer.
>>> df = sqlContext.createDataFrame([('2015-04-08',)], ['a'])
- >>> df.select(week_of_year('a').alias('week')).collect()
+ >>> df.select(weekofyear('a').alias('week')).collect()
[Row(week=15)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.week_of_year(col))
+ return Column(sc._jvm.functions.weekofyear(col))
class UserDefinedFunction(object):