[SPARK-8245][SQL] FormatNumber/Length Support for Expression

- `BinaryType` for `Length` - `FormatNumber` Author: Cheng Hao <hao.cheng@intel.com> Closes #7034 from chenghao-intel/expression and squashes the following commits: e534b87 [Cheng Hao] python api style issue 601bbf5 [Cheng Hao] add python API support 3ebe288 [Cheng Hao] update as feedback 52274f7 [Cheng Hao] add support for udf_format_number and length for binary
author: Cheng Hao <hao.cheng@intel.com> 2015-07-15 21:47:21 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-07-15 21:47:21 -0700
commit: 42dea3acf90ec506a0b79720b55ae1d753cc7544 (patch)
tree: 617b79a51b14a397fde87e412e329676566240ca /python/pyspark/sql/functions.py
parent: 9c64a75bfc5e2566d1b4cd0d9b4585a818086ca6 (diff)
download: spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.gz
spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.bz2
spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.zip
1 files changed, 20 insertions, 5 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index dca39fa833..e0816b3e65 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -39,6 +39,8 @@ __all__ = [
     'coalesce',
     'countDistinct',
     'explode',
+    'format_number',
+    'length',
     'log2',
     'md5',
     'monotonicallyIncreasingId',
@@ -47,7 +49,6 @@ __all__ = [
     'sha1',
     'sha2',
     'sparkPartitionId',
-    'strlen',
     'struct',
     'udf',
     'when']
@@ -506,14 +507,28 @@ def sparkPartitionId():
 
 @ignore_unicode_prefix
 @since(1.5)
-def strlen(col):
-    """Calculates the length of a string expression.
+def length(col):
+    """Calculates the length of a string or binary expression.
 
-    >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(strlen('a').alias('length')).collect()
+    >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
     [Row(length=3)]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.strlen(_to_java_column(col)))
+    return Column(sc._jvm.functions.length(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def format_number(col, d):
+    """Formats the number X to a format like '#,###,###.##', rounded to d decimal places,
+       and returns the result as a string.
+    :param col: the column name of the numeric value to be formatted
+    :param d: the N decimal places
+    >>> sqlContext.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
+    [Row(v=u'5.0000')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.format_number(_to_java_column(col), d))
 
 
 @ignore_unicode_prefix
author	Cheng Hao <hao.cheng@intel.com>	2015-07-15 21:47:21 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-07-15 21:47:21 -0700
commit	42dea3acf90ec506a0b79720b55ae1d753cc7544 (patch)
tree	617b79a51b14a397fde87e412e329676566240ca /python/pyspark/sql/functions.py
parent	9c64a75bfc5e2566d1b4cd0d9b4585a818086ca6 (diff)
download	spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.gz spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.bz2 spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.zip