From fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Tue, 24 Feb 2015 18:59:23 -0800 Subject: [SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python. Also added desc/asc function for constructing sorting expressions more conveniently. And added a small fix to lift alias out of cast expression. Author: Reynold Xin Closes #4752 from rxin/SPARK-5985 and squashes the following commits: aeda5ae [Reynold Xin] Added Experimental flag to ColumnName. 047ad03 [Reynold Xin] Lift alias out of cast. c9cf17c [Reynold Xin] [SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python. --- python/pyspark/sql/dataframe.py | 11 ++++++++--- python/pyspark/sql/functions.py | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'python/pyspark') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 010c38f93b..6f746d136b 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -504,13 +504,18 @@ class DataFrame(object): return DataFrame(jdf, self.sql_ctx) def sort(self, *cols): - """ Return a new :class:`DataFrame` sorted by the specified column. + """ Return a new :class:`DataFrame` sorted by the specified column(s). :param cols: The columns or expressions used for sorting >>> df.sort(df.age.desc()).collect() [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')] - >>> df.sortBy(df.age.desc()).collect() + >>> df.orderBy(df.age.desc()).collect() + [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')] + >>> from pyspark.sql.functions import * + >>> df.sort(asc("age")).collect() + [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')] + >>> df.orderBy(desc("age"), "name").collect() [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')] """ if not cols: @@ -520,7 +525,7 @@ class DataFrame(object): jdf = self._jdf.sort(self._sc._jvm.PythonUtils.toSeq(jcols)) return DataFrame(jdf, self.sql_ctx) - sortBy = sort + orderBy = sort def head(self, n=None): """ Return the first `n` rows or the first row if n is None. diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fc61162f0b..8aa4476520 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -48,6 +48,9 @@ _functions = { 'lit': 'Creates a :class:`Column` of literal value.', 'col': 'Returns a :class:`Column` based on the given column name.', 'column': 'Returns a :class:`Column` based on the given column name.', + 'asc': 'Returns a sort expression based on the ascending order of the given column name.', + 'desc': 'Returns a sort expression based on the descending order of the given column name.', + 'upper': 'Converts a string expression to upper case.', 'lower': 'Converts a string expression to upper case.', 'sqrt': 'Computes the square root of the specified float value.', -- cgit v1.2.3