aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-07-15 21:47:21 -0700
committerReynold Xin <rxin@databricks.com>2015-07-15 21:47:21 -0700
commit42dea3acf90ec506a0b79720b55ae1d753cc7544 (patch)
tree617b79a51b14a397fde87e412e329676566240ca /python/pyspark
parent9c64a75bfc5e2566d1b4cd0d9b4585a818086ca6 (diff)
downloadspark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.gz
spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.tar.bz2
spark-42dea3acf90ec506a0b79720b55ae1d753cc7544.zip
[SPARK-8245][SQL] FormatNumber/Length Support for Expression
- `BinaryType` for `Length` - `FormatNumber` Author: Cheng Hao <hao.cheng@intel.com> Closes #7034 from chenghao-intel/expression and squashes the following commits: e534b87 [Cheng Hao] python api style issue 601bbf5 [Cheng Hao] add python API support 3ebe288 [Cheng Hao] update as feedback 52274f7 [Cheng Hao] add support for udf_format_number and length for binary
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/functions.py25
1 files changed, 20 insertions, 5 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index dca39fa833..e0816b3e65 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -39,6 +39,8 @@ __all__ = [
'coalesce',
'countDistinct',
'explode',
+ 'format_number',
+ 'length',
'log2',
'md5',
'monotonicallyIncreasingId',
@@ -47,7 +49,6 @@ __all__ = [
'sha1',
'sha2',
'sparkPartitionId',
- 'strlen',
'struct',
'udf',
'when']
@@ -506,14 +507,28 @@ def sparkPartitionId():
@ignore_unicode_prefix
@since(1.5)
-def strlen(col):
- """Calculates the length of a string expression.
+def length(col):
+ """Calculates the length of a string or binary expression.
- >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(strlen('a').alias('length')).collect()
+ >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
[Row(length=3)]
"""
sc = SparkContext._active_spark_context
- return Column(sc._jvm.functions.strlen(_to_java_column(col)))
+ return Column(sc._jvm.functions.length(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def format_number(col, d):
+ """Formats the number X to a format like '#,###,###.##', rounded to d decimal places,
+ and returns the result as a string.
+ :param col: the column name of the numeric value to be formatted
+ :param d: the N decimal places
+ >>> sqlContext.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
+ [Row(v=u'5.0000')]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.format_number(_to_java_column(col), d))
@ignore_unicode_prefix