[SPARK-8176] [SPARK-8197] [SQL] function to_date/ trunc

This PR is based on #6988 , thanks to adrian-wang . This brings two SQL functions: to_date() and trunc(). Closes #6988 Author: Daoyuan Wang <daoyuan.wang@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7805 from davies/to_date and squashes the following commits: 2c7beba [Davies Liu] Merge branch 'master' of github.com:apache/spark into to_date 310dd55 [Daoyuan Wang] remove dup test in rebase 980b092 [Daoyuan Wang] resolve rebase conflict a476c5a [Daoyuan Wang] address comments from davies d44ea5f [Daoyuan Wang] function to_date, trunc
author: Daoyuan Wang <daoyuan.wang@intel.com> 2015-07-30 19:22:38 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-07-30 19:22:38 -0700
commit: 83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0 (patch)
tree: 6dcb87133b02c1f9e847dbcb9b5bf667a7dadd3f /python
parent: 9307f5653d19a6a2fda355a675ca9ea97e35611b (diff)
download: spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.gz
spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.bz2
spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.zip
1 files changed, 30 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index a7295e25f0..8024a8de07 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -889,6 +889,36 @@ def months_between(date1, date2):
 
 
 @since(1.5)
+def to_date(col):
+    """
+    Converts the column of StringType or TimestampType into DateType.
+
+    >>> df = sqlContext.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_date(df.t).alias('date')).collect()
+    [Row(date=datetime.date(1997, 2, 28))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.to_date(_to_java_column(col)))
+
+
+@since(1.5)
+def trunc(date, format):
+    """
+    Returns date truncated to the unit specified by the format.
+
+    :param format: 'year', 'YYYY', 'yy' or 'month', 'mon', 'mm'
+
+    >>> df = sqlContext.createDataFrame([('1997-02-28',)], ['d'])
+    >>> df.select(trunc(df.d, 'year').alias('year')).collect()
+    [Row(year=datetime.date(1997, 1, 1))]
+    >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
+    [Row(month=datetime.date(1997, 2, 1))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
+
+
+@since(1.5)
 def size(col):
     """
     Collection function: returns the length of the array or map stored in the column.
author	Daoyuan Wang <daoyuan.wang@intel.com>	2015-07-30 19:22:38 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-07-30 19:22:38 -0700
commit	83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0 (patch)
tree	6dcb87133b02c1f9e847dbcb9b5bf667a7dadd3f /python
parent	9307f5653d19a6a2fda355a675ca9ea97e35611b (diff)
download	spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.gz spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.bz2 spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.zip