diff options
author | Cheng Hao <hao.cheng@intel.com> | 2015-07-31 23:11:22 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-07-31 23:11:22 -0700 |
commit | 67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf (patch) | |
tree | de2371bb74fec1fa6b93391809e30bbad202ca3f /python/pyspark/sql | |
parent | 3320b0ba262159c0c7209ce39b353c93c597077d (diff) | |
download | spark-67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf.tar.gz spark-67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf.tar.bz2 spark-67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf.zip |
[SPARK-8232] [SQL] Add sort_array support
Add expression `sort_array` support.
Author: Cheng Hao <hao.cheng@intel.com>
This patch had conflicts when merged, resolved by
Committer: Davies Liu <davies.liu@gmail.com>
Closes #7581 from chenghao-intel/sort_array and squashes the following commits:
664c960 [Cheng Hao] update the sort_array by using the ArrayData
276d2d5 [Cheng Hao] add empty line
0edab9c [Cheng Hao] Add asending/descending support for sort_array
80fc0f8 [Cheng Hao] Add type checking
a42b678 [Cheng Hao] Add sort_array support
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/functions.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 89a2a5ceaa..fb542e6cff 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -51,6 +51,7 @@ __all__ = [ 'sha1', 'sha2', 'size', + 'sort_array', 'sparkPartitionId', 'struct', 'udf', @@ -570,8 +571,10 @@ def length(col): def format_number(col, d): """Formats the number X to a format like '#,###,###.##', rounded to d decimal places, and returns the result as a string. + :param col: the column name of the numeric value to be formatted :param d: the N decimal places + >>> sqlContext.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect() [Row(v=u'5.0000')] """ @@ -968,6 +971,23 @@ def soundex(col): return Column(sc._jvm.functions.size(_to_java_column(col))) +@since(1.5) +def sort_array(col, asc=True): + """ + Collection function: sorts the input array for the given column in ascending order. + + :param col: name of column or expression + + >>> df = sqlContext.createDataFrame([([2, 1, 3],),([1],),([],)], ['data']) + >>> df.select(sort_array(df.data).alias('r')).collect() + [Row(r=[1, 2, 3]), Row(r=[1]), Row(r=[])] + >>> df.select(sort_array(df.data, asc=False).alias('r')).collect() + [Row(r=[3, 2, 1]), Row(r=[1]), Row(r=[])] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc)) + + class UserDefinedFunction(object): """ User defined function in Python |