diff options
author | Daoyuan Wang <daoyuan.wang@intel.com> | 2015-07-30 19:22:38 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-30 19:22:38 -0700 |
commit | 83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0 (patch) | |
tree | 6dcb87133b02c1f9e847dbcb9b5bf667a7dadd3f /python | |
parent | 9307f5653d19a6a2fda355a675ca9ea97e35611b (diff) | |
download | spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.gz spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.tar.bz2 spark-83670fc9e6fc9c7a6ae68dfdd3f9335ea72f4ab0.zip |
[SPARK-8176] [SPARK-8197] [SQL] function to_date/ trunc
This PR is based on #6988 , thanks to adrian-wang .
This brings two SQL functions: to_date() and trunc().
Closes #6988
Author: Daoyuan Wang <daoyuan.wang@intel.com>
Author: Davies Liu <davies@databricks.com>
Closes #7805 from davies/to_date and squashes the following commits:
2c7beba [Davies Liu] Merge branch 'master' of github.com:apache/spark into to_date
310dd55 [Daoyuan Wang] remove dup test in rebase
980b092 [Daoyuan Wang] resolve rebase conflict
a476c5a [Daoyuan Wang] address comments from davies
d44ea5f [Daoyuan Wang] function to_date, trunc
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/functions.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index a7295e25f0..8024a8de07 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -889,6 +889,36 @@ def months_between(date1, date2): @since(1.5) +def to_date(col): + """ + Converts the column of StringType or TimestampType into DateType. + + >>> df = sqlContext.createDataFrame([('1997-02-28 10:30:00',)], ['t']) + >>> df.select(to_date(df.t).alias('date')).collect() + [Row(date=datetime.date(1997, 2, 28))] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.to_date(_to_java_column(col))) + + +@since(1.5) +def trunc(date, format): + """ + Returns date truncated to the unit specified by the format. + + :param format: 'year', 'YYYY', 'yy' or 'month', 'mon', 'mm' + + >>> df = sqlContext.createDataFrame([('1997-02-28',)], ['d']) + >>> df.select(trunc(df.d, 'year').alias('year')).collect() + [Row(year=datetime.date(1997, 1, 1))] + >>> df.select(trunc(df.d, 'mon').alias('month')).collect() + [Row(month=datetime.date(1997, 2, 1))] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) + + +@since(1.5) def size(col): """ Collection function: returns the length of the array or map stored in the column. |