diff options
author | xin wu <xinwu@us.ibm.com> | 2016-09-25 16:46:12 -0700 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-09-25 16:46:12 -0700 |
commit | de333d121da4cb80d45819cbcf8b4246e48ec4d0 (patch) | |
tree | 64d7b8dc9b34749574f5ddd78a9f951ca9ed8575 /sql/core/src | |
parent | 7945daed12542587d51ece8f07e5c828b40db14a (diff) | |
download | spark-de333d121da4cb80d45819cbcf8b4246e48ec4d0.tar.gz spark-de333d121da4cb80d45819cbcf8b4246e48ec4d0.tar.bz2 spark-de333d121da4cb80d45819cbcf8b4246e48ec4d0.zip |
[SPARK-17551][SQL] Add DataFrame API for null ordering
## What changes were proposed in this pull request?
This pull request adds Scala/Java DataFrame API for null ordering (NULLS FIRST | LAST).
Also did some minor clean up for related code (e.g. incorrect indentation), and renamed "orderby-nulls-ordering.sql" to be consistent with existing test files.
## How was this patch tested?
Added a new test case in DataFrameSuite.
Author: petermaxlee <petermaxlee@gmail.com>
Author: Xin Wu <xinwu@us.ibm.com>
Closes #15123 from petermaxlee/SPARK-17551.
Diffstat (limited to 'sql/core/src')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/Column.scala | 64 | ||||
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 51 | ||||
-rw-r--r-- | sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql (renamed from sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql) | 0 | ||||
-rw-r--r-- | sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out (renamed from sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out) | 0 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 18 |
5 files changed, 129 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 844ca7a8e9..63da501f18 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -1007,7 +1007,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { /** * Returns an ordering used in sorting. * {{{ - * // Scala: sort a DataFrame by age column in descending order. + * // Scala * df.sort(df("age").desc) * * // Java @@ -1020,7 +1020,37 @@ class Column(protected[sql] val expr: Expression) extends Logging { def desc: Column = withExpr { SortOrder(expr, Descending) } /** - * Returns an ordering used in sorting. + * Returns a descending ordering used in sorting, where null values appear before non-null values. + * {{{ + * // Scala: sort a DataFrame by age column in descending order and null values appearing first. + * df.sort(df("age").desc_nulls_first) + * + * // Java + * df.sort(df.col("age").desc_nulls_first()); + * }}} + * + * @group expr_ops + * @since 2.1.0 + */ + def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst) } + + /** + * Returns a descending ordering used in sorting, where null values appear after non-null values. + * {{{ + * // Scala: sort a DataFrame by age column in descending order and null values appearing last. + * df.sort(df("age").desc_nulls_last) + * + * // Java + * df.sort(df.col("age").desc_nulls_last()); + * }}} + * + * @group expr_ops + * @since 2.1.0 + */ + def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast) } + + /** + * Returns an ascending ordering used in sorting. * {{{ * // Scala: sort a DataFrame by age column in ascending order. * df.sort(df("age").asc) @@ -1035,6 +1065,36 @@ class Column(protected[sql] val expr: Expression) extends Logging { def asc: Column = withExpr { SortOrder(expr, Ascending) } /** + * Returns an ascending ordering used in sorting, where null values appear before non-null values. + * {{{ + * // Scala: sort a DataFrame by age column in ascending order and null values appearing first. + * df.sort(df("age").asc_nulls_last) + * + * // Java + * df.sort(df.col("age").asc_nulls_last()); + * }}} + * + * @group expr_ops + * @since 2.1.0 + */ + def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst) } + + /** + * Returns an ordering used in sorting, where null values appear after non-null values. + * {{{ + * // Scala: sort a DataFrame by age column in ascending order and null values appearing last. + * df.sort(df("age").asc_nulls_last) + * + * // Java + * df.sort(df.col("age").asc_nulls_last()); + * }}} + * + * @group expr_ops + * @since 2.1.0 + */ + def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast) } + + /** * Prints the expression to the console for debugging purpose. * * @group df_ops diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 960c87f60e..47bf41a2da 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -109,7 +109,6 @@ object functions { /** * Returns a sort expression based on ascending order of the column. * {{{ - * // Sort by dept in ascending order, and then age in descending order. * df.sort(asc("dept"), desc("age")) * }}} * @@ -119,9 +118,32 @@ object functions { def asc(columnName: String): Column = Column(columnName).asc /** + * Returns a sort expression based on ascending order of the column, + * and null values return before non-null values. + * {{{ + * df.sort(asc_nulls_last("dept"), desc("age")) + * }}} + * + * @group sort_funcs + * @since 2.1.0 + */ + def asc_nulls_first(columnName: String): Column = Column(columnName).asc_nulls_first + + /** + * Returns a sort expression based on ascending order of the column, + * and null values appear after non-null values. + * {{{ + * df.sort(asc_nulls_last("dept"), desc("age")) + * }}} + * + * @group sort_funcs + * @since 2.1.0 + */ + def asc_nulls_last(columnName: String): Column = Column(columnName).asc_nulls_last + + /** * Returns a sort expression based on the descending order of the column. * {{{ - * // Sort by dept in ascending order, and then age in descending order. * df.sort(asc("dept"), desc("age")) * }}} * @@ -130,6 +152,31 @@ object functions { */ def desc(columnName: String): Column = Column(columnName).desc + /** + * Returns a sort expression based on the descending order of the column, + * and null values appear before non-null values. + * {{{ + * df.sort(asc("dept"), desc_nulls_first("age")) + * }}} + * + * @group sort_funcs + * @since 2.1.0 + */ + def desc_nulls_first(columnName: String): Column = Column(columnName).desc_nulls_first + + /** + * Returns a sort expression based on the descending order of the column, + * and null values appear after non-null values. + * {{{ + * df.sort(asc("dept"), desc_nulls_last("age")) + * }}} + * + * @group sort_funcs + * @since 2.1.0 + */ + def desc_nulls_last(columnName: String): Column = Column(columnName).desc_nulls_last + + ////////////////////////////////////////////////////////////////////////////////////////////// // Aggregate functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql b/sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql index f7637b444b..f7637b444b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql diff --git a/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out index c1b63dfb8c..c1b63dfb8c 100644 --- a/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 2c60a7dd92..16cc368208 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -326,6 +326,24 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { Row(6)) } + test("sorting with null ordering") { + val data = Seq[java.lang.Integer](2, 1, null).toDF("key") + + checkAnswer(data.orderBy('key.asc), Row(null) :: Row(1) :: Row(2) :: Nil) + checkAnswer(data.orderBy(asc("key")), Row(null) :: Row(1) :: Row(2) :: Nil) + checkAnswer(data.orderBy('key.asc_nulls_first), Row(null) :: Row(1) :: Row(2) :: Nil) + checkAnswer(data.orderBy(asc_nulls_first("key")), Row(null) :: Row(1) :: Row(2) :: Nil) + checkAnswer(data.orderBy('key.asc_nulls_last), Row(1) :: Row(2) :: Row(null) :: Nil) + checkAnswer(data.orderBy(asc_nulls_last("key")), Row(1) :: Row(2) :: Row(null) :: Nil) + + checkAnswer(data.orderBy('key.desc), Row(2) :: Row(1) :: Row(null) :: Nil) + checkAnswer(data.orderBy(desc("key")), Row(2) :: Row(1) :: Row(null) :: Nil) + checkAnswer(data.orderBy('key.desc_nulls_first), Row(null) :: Row(2) :: Row(1) :: Nil) + checkAnswer(data.orderBy(desc_nulls_first("key")), Row(null) :: Row(2) :: Row(1) :: Nil) + checkAnswer(data.orderBy('key.desc_nulls_last), Row(2) :: Row(1) :: Row(null) :: Nil) + checkAnswer(data.orderBy(desc_nulls_last("key")), Row(2) :: Row(1) :: Row(null) :: Nil) + } + test("global sorting") { checkAnswer( testData2.orderBy('a.asc, 'b.asc), |