diff options
author | Cheng Hao <hao.cheng@intel.com> | 2015-07-15 21:47:21 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-15 21:47:21 -0700 |
commit | 42dea3acf90ec506a0b79720b55ae1d753cc7544 (patch) | |
tree | 617b79a51b14a397fde87e412e329676566240ca /python/pyspark/sql/functions.py | |
parent | 9c64a75bfc5e2566d1b4cd0d9b4585a818086ca6 (diff) | |
download | spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.gz spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.bz2 spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.zip |
[SPARK-8245][SQL] FormatNumber/Length Support for Expression
- `BinaryType` for `Length`
- `FormatNumber`
Author: Cheng Hao <hao.cheng@intel.com>
Closes #7034 from chenghao-intel/expression and squashes the following commits:
e534b87 [Cheng Hao] python api style issue
601bbf5 [Cheng Hao] add python API support
3ebe288 [Cheng Hao] update as feedback
52274f7 [Cheng Hao] add support for udf_format_number and length for binary
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r-- | python/pyspark/sql/functions.py | 25 |
1 files changed, 20 insertions, 5 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index dca39fa833..e0816b3e65 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -39,6 +39,8 @@ __all__ = [ 'coalesce', 'countDistinct', 'explode', + 'format_number', + 'length', 'log2', 'md5', 'monotonicallyIncreasingId', @@ -47,7 +49,6 @@ __all__ = [ 'sha1', 'sha2', 'sparkPartitionId', - 'strlen', 'struct', 'udf', 'when'] @@ -506,14 +507,28 @@ def sparkPartitionId(): @ignore_unicode_prefix @since(1.5) -def strlen(col): - """Calculates the length of a string expression. +def length(col): + """Calculates the length of a string or binary expression. - >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(strlen('a').alias('length')).collect() + >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect() [Row(length=3)] """ sc = SparkContext._active_spark_context - return Column(sc._jvm.functions.strlen(_to_java_column(col))) + return Column(sc._jvm.functions.length(_to_java_column(col))) + + +@ignore_unicode_prefix +@since(1.5) +def format_number(col, d): + """Formats the number X to a format like '#,###,###.##', rounded to d decimal places, + and returns the result as a string. + :param col: the column name of the numeric value to be formatted + :param d: the N decimal places + >>> sqlContext.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect() + [Row(v=u'5.0000')] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.format_number(_to_java_column(col), d)) @ignore_unicode_prefix |