aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-02-24 18:59:23 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-24 18:59:23 -0800
commitfba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d (patch)
tree0265e1e104b5e64f77e2a72f12f91444d63810ea /sql
parent53a1ebf33b5c349ae3a40d7eebf357b839b363af (diff)
downloadspark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.tar.gz
spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.tar.bz2
spark-fba11c2f55dd81e4f6230e7edca3c7b2e01ccd9d.zip
[SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python.
Also added desc/asc function for constructing sorting expressions more conveniently. And added a small fix to lift alias out of cast expression. Author: Reynold Xin <rxin@databricks.com> Closes #4752 from rxin/SPARK-5985 and squashes the following commits: aeda5ae [Reynold Xin] Added Experimental flag to ColumnName. 047ad03 [Reynold Xin] Lift alias out of cast. c9cf17c [Reynold Xin] [SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala13
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala29
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala4
4 files changed, 48 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 980754322e..a2cc9a9b93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -600,7 +600,11 @@ class Column(protected[sql] val expr: Expression) {
*
* @group expr_ops
*/
- def cast(to: DataType): Column = Cast(expr, to)
+ def cast(to: DataType): Column = expr match {
+ // Lift alias out of cast so we can support col.as("name").cast(IntegerType)
+ case Alias(childExpr, name) => Alias(Cast(childExpr, to), name)()
+ case _ => Cast(expr, to)
+ }
/**
* Casts the column to a different data type, using the canonical string representation
@@ -613,7 +617,7 @@ class Column(protected[sql] val expr: Expression) {
*
* @group expr_ops
*/
- def cast(to: String): Column = Cast(expr, to.toLowerCase match {
+ def cast(to: String): Column = cast(to.toLowerCase match {
case "string" | "str" => StringType
case "boolean" => BooleanType
case "byte" => ByteType
@@ -671,6 +675,11 @@ class Column(protected[sql] val expr: Expression) {
}
+/**
+ * :: Experimental ::
+ * A convenient class used for constructing schema.
+ */
+@Experimental
class ColumnName(name: String) extends Column(name) {
/** Creates a new AttributeReference of type boolean */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 2a1e086891..4fdbfc6d22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.types._
*
* @groupname udf_funcs UDF functions
* @groupname agg_funcs Aggregate functions
+ * @groupname sort_funcs Sorting functions
* @groupname normal_funcs Non-aggregate functions
* @groupname Ungrouped Support functions for DataFrames.
*/
@@ -96,6 +97,33 @@ object functions {
}
//////////////////////////////////////////////////////////////////////////////////////////////
+ // Sort functions
+ //////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Returns a sort expression based on ascending order of the column.
+ * {{
+ * // Sort by dept in ascending order, and then age in descending order.
+ * df.sort(asc("dept"), desc("age"))
+ * }}
+ *
+ * @group sort_funcs
+ */
+ def asc(columnName: String): Column = Column(columnName).asc
+
+ /**
+ * Returns a sort expression based on the descending order of the column.
+ * {{
+ * // Sort by dept in ascending order, and then age in descending order.
+ * df.sort(asc("dept"), desc("age"))
+ * }}
+ *
+ * @group sort_funcs
+ */
+ def desc(columnName: String): Column = Column(columnName).desc
+
+ //////////////////////////////////////////////////////////////////////////////////////////////
+ // Aggregate functions
//////////////////////////////////////////////////////////////////////////////////////////////
/**
@@ -263,6 +291,7 @@ object functions {
def max(columnName: String): Column = max(Column(columnName))
//////////////////////////////////////////////////////////////////////////////////////////////
+ // Non-aggregate functions
//////////////////////////////////////////////////////////////////////////////////////////////
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 928b0deb61..37c02aaa54 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -309,4 +309,8 @@ class ColumnExpressionSuite extends QueryTest {
(1 to 100).map(n => Row(null))
)
}
+
+ test("lift alias out of cast") {
+ assert(col("1234").as("name").cast("int").expr === col("1234").cast("int").as("name").expr)
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 30e77e4ef3..c392a553c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -240,6 +240,10 @@ class DataFrameSuite extends QueryTest {
Seq(Row(1,1), Row(1,2), Row(2,1), Row(2,2), Row(3,1), Row(3,2)))
checkAnswer(
+ testData2.orderBy(asc("a"), desc("b")),
+ Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1)))
+
+ checkAnswer(
testData2.orderBy('a.asc, 'b.desc),
Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1)))