aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-08-01 08:32:29 -0700
committerDavies Liu <davies.liu@gmail.com>2015-08-01 08:32:29 -0700
commitcf6c9ca32a89422e25007d333bc8714d9b0ae6d8 (patch)
treea3bb6344de1e6351cf289e8194b3d75bd48e3136 /python
parent8765665015ef47a23e00f7d01d4d280c31bb236d (diff)
downloadspark-cf6c9ca32a89422e25007d333bc8714d9b0ae6d8.tar.gz
spark-cf6c9ca32a89422e25007d333bc8714d9b0ae6d8.tar.bz2
spark-cf6c9ca32a89422e25007d333bc8714d9b0ae6d8.zip
[SPARK-8232] [SQL] Add sort_array support
This PR is based on #7581 , just fix the conflict. Author: Cheng Hao <hao.cheng@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7851 from davies/sort_array and squashes the following commits: a80ef66 [Davies Liu] fix conflict 7cfda65 [Davies Liu] Merge branch 'master' of github.com:apache/spark into sort_array 664c960 [Cheng Hao] update the sort_array by using the ArrayData 276d2d5 [Cheng Hao] add empty line 0edab9c [Cheng Hao] Add asending/descending support for sort_array 80fc0f8 [Cheng Hao] Add type checking a42b678 [Cheng Hao] Add sort_array support
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 89a2a5ceaa..81dc7d832e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -51,6 +51,7 @@ __all__ = [
'sha1',
'sha2',
'size',
+ 'sort_array',
'sparkPartitionId',
'struct',
'udf',
@@ -570,8 +571,10 @@ def length(col):
def format_number(col, d):
"""Formats the number X to a format like '#,###,###.##', rounded to d decimal places,
and returns the result as a string.
+
:param col: the column name of the numeric value to be formatted
:param d: the N decimal places
+
>>> sqlContext.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
[Row(v=u'5.0000')]
"""
@@ -954,6 +957,23 @@ def size(col):
return Column(sc._jvm.functions.size(_to_java_column(col)))
+@since(1.5)
+def sort_array(col, asc=True):
+ """
+ Collection function: sorts the input array for the given column in ascending order.
+
+ :param col: name of column or expression
+
+ >>> df = sqlContext.createDataFrame([([2, 1, 3],),([1],),([],)], ['data'])
+ >>> df.select(sort_array(df.data).alias('r')).collect()
+ [Row(r=[1, 2, 3]), Row(r=[1]), Row(r=[])]
+ >>> df.select(sort_array(df.data, asc=False).alias('r')).collect()
+ [Row(r=[3, 2, 1]), Row(r=[1]), Row(r=[])]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
+
+
@since
@ignore_unicode_prefix
def soundex(col):