aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2015-07-30 19:22:38 -0700
committerReynold Xin <rxin@databricks.com>2015-07-30 19:22:38 -0700
commit83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0 (patch)
tree6dcb87133b02c1f9e847dbcb9b5bf667a7dadd3f /python
parent9307f5653d19a6a2fda355a675ca9ea97e35611b (diff)
downloadspark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.gz
spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.bz2
spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.zip
[SPARK-8176] [SPARK-8197] [SQL] function to_date/ trunc
This PR is based on #6988 , thanks to adrian-wang . This brings two SQL functions: to_date() and trunc(). Closes #6988 Author: Daoyuan Wang <daoyuan.wang@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7805 from davies/to_date and squashes the following commits: 2c7beba [Davies Liu] Merge branch 'master' of github.com:apache/spark into to_date 310dd55 [Daoyuan Wang] remove dup test in rebase 980b092 [Daoyuan Wang] resolve rebase conflict a476c5a [Daoyuan Wang] address comments from davies d44ea5f [Daoyuan Wang] function to_date, trunc
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index a7295e25f0..8024a8de07 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -889,6 +889,36 @@ def months_between(date1, date2):
@since(1.5)
+def to_date(col):
+ """
+ Converts the column of StringType or TimestampType into DateType.
+
+ >>> df = sqlContext.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+ >>> df.select(to_date(df.t).alias('date')).collect()
+ [Row(date=datetime.date(1997, 2, 28))]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.to_date(_to_java_column(col)))
+
+
+@since(1.5)
+def trunc(date, format):
+ """
+ Returns date truncated to the unit specified by the format.
+
+ :param format: 'year', 'YYYY', 'yy' or 'month', 'mon', 'mm'
+
+ >>> df = sqlContext.createDataFrame([('1997-02-28',)], ['d'])
+ >>> df.select(trunc(df.d, 'year').alias('year')).collect()
+ [Row(year=datetime.date(1997, 1, 1))]
+ >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
+ [Row(month=datetime.date(1997, 2, 1))]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
+
+
+@since(1.5)
def size(col):
"""
Collection function: returns the length of the array or map stored in the column.