[SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python.

Also added desc/asc function for constructing sorting expressions more conveniently. And added a small fix to lift alias out of cast expression. Author: Reynold Xin <rxin@databricks.com> Closes #4752 from rxin/SPARK-5985 and squashes the following commits: aeda5ae [Reynold Xin] Added Experimental flag to ColumnName. 047ad03 [Reynold Xin] Lift alias out of cast. c9cf17c [Reynold Xin] [SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python.
author: Reynold Xin <rxin@databricks.com> 2015-02-24 18:59:23 -0800
committer: Michael Armbrust <michael@databricks.com> 2015-02-24 18:59:23 -0800
commit: fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d (patch)
tree: 0265e1e104b5e64f77e2a72f12f91444d63810ea /python/pyspark
parent: 53a1ebf33b5c349ae3a40d7eebf357b839b363af (diff)
download: spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.tar.gz
spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.tar.bz2
spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.zip
2 files changed, 11 insertions, 3 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 010c38f93b..6f746d136b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -504,13 +504,18 @@ class DataFrame(object):
         return DataFrame(jdf, self.sql_ctx)
 
     def sort(self, *cols):
-        """ Return a new :class:`DataFrame` sorted by the specified column.
+        """ Return a new :class:`DataFrame` sorted by the specified column(s).
 
         :param cols: The columns or expressions used for sorting
 
         >>> df.sort(df.age.desc()).collect()
         [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
-        >>> df.sortBy(df.age.desc()).collect()
+        >>> df.orderBy(df.age.desc()).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        >>> from pyspark.sql.functions import *
+        >>> df.sort(asc("age")).collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        >>> df.orderBy(desc("age"), "name").collect()
         [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
         """
         if not cols:
@@ -520,7 +525,7 @@ class DataFrame(object):
         jdf = self._jdf.sort(self._sc._jvm.PythonUtils.toSeq(jcols))
         return DataFrame(jdf, self.sql_ctx)
 
-    sortBy = sort
+    orderBy = sort
 
     def head(self, n=None):
         """ Return the first `n` rows or the first row if n is None.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index fc61162f0b..8aa4476520 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -48,6 +48,9 @@ _functions = {
     'lit': 'Creates a :class:`Column` of literal value.',
     'col': 'Returns a :class:`Column` based on the given column name.',
     'column': 'Returns a :class:`Column` based on the given column name.',
+    'asc': 'Returns a sort expression based on the ascending order of the given column name.',
+    'desc': 'Returns a sort expression based on the descending order of the given column name.',
+
     'upper': 'Converts a string expression to upper case.',
     'lower': 'Converts a string expression to upper case.',
     'sqrt': 'Computes the square root of the specified float value.',
author	Reynold Xin <rxin@databricks.com>	2015-02-24 18:59:23 -0800
committer	Michael Armbrust <michael@databricks.com>	2015-02-24 18:59:23 -0800
commit	fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d (patch)
tree	0265e1e104b5e64f77e2a72f12f91444d63810ea /python/pyspark
parent	53a1ebf33b5c349ae3a40d7eebf357b839b363af (diff)
download	spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.tar.gz spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.tar.bz2 spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.zip