aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala68
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala2
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql83
-rw-r--r--sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out254
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala3
5 files changed, 398 insertions, 12 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
index 940467e74d..c6665d273f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
@@ -40,22 +40,70 @@ object SortPrefixUtils {
def getPrefixComparator(sortOrder: SortOrder): PrefixComparator = {
sortOrder.dataType match {
- case StringType =>
- if (sortOrder.isAscending) PrefixComparators.STRING else PrefixComparators.STRING_DESC
- case BinaryType =>
- if (sortOrder.isAscending) PrefixComparators.BINARY else PrefixComparators.BINARY_DESC
+ case StringType => stringPrefixComparator(sortOrder)
+ case BinaryType => binaryPrefixComparator(sortOrder)
case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType =>
- if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC
+ longPrefixComparator(sortOrder)
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS =>
- if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC
- case FloatType | DoubleType =>
- if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC
- case dt: DecimalType =>
- if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC
+ longPrefixComparator(sortOrder)
+ case FloatType | DoubleType => doublePrefixComparator(sortOrder)
+ case dt: DecimalType => doublePrefixComparator(sortOrder)
case _ => NoOpPrefixComparator
}
}
+ private def stringPrefixComparator(sortOrder: SortOrder): PrefixComparator = {
+ sortOrder.direction match {
+ case Ascending if (sortOrder.nullOrdering == NullsLast) =>
+ PrefixComparators.STRING_NULLS_LAST
+ case Ascending =>
+ PrefixComparators.STRING
+ case Descending if (sortOrder.nullOrdering == NullsFirst) =>
+ PrefixComparators.STRING_DESC_NULLS_FIRST
+ case Descending =>
+ PrefixComparators.STRING_DESC
+ }
+ }
+
+ private def binaryPrefixComparator(sortOrder: SortOrder): PrefixComparator = {
+ sortOrder.direction match {
+ case Ascending if (sortOrder.nullOrdering == NullsLast) =>
+ PrefixComparators.BINARY_NULLS_LAST
+ case Ascending =>
+ PrefixComparators.BINARY
+ case Descending if (sortOrder.nullOrdering == NullsFirst) =>
+ PrefixComparators.BINARY_DESC_NULLS_FIRST
+ case Descending =>
+ PrefixComparators.BINARY_DESC
+ }
+ }
+
+ private def longPrefixComparator(sortOrder: SortOrder): PrefixComparator = {
+ sortOrder.direction match {
+ case Ascending if (sortOrder.nullOrdering == NullsLast) =>
+ PrefixComparators.LONG_NULLS_LAST
+ case Ascending =>
+ PrefixComparators.LONG
+ case Descending if (sortOrder.nullOrdering == NullsFirst) =>
+ PrefixComparators.LONG_DESC_NULLS_FIRST
+ case Descending =>
+ PrefixComparators.LONG_DESC
+ }
+ }
+
+ private def doublePrefixComparator(sortOrder: SortOrder): PrefixComparator = {
+ sortOrder.direction match {
+ case Ascending if (sortOrder.nullOrdering == NullsLast) =>
+ PrefixComparators.DOUBLE_NULLS_LAST
+ case Ascending =>
+ PrefixComparators.DOUBLE
+ case Descending if (sortOrder.nullOrdering == NullsFirst) =>
+ PrefixComparators.DOUBLE_DESC_NULLS_FIRST
+ case Descending =>
+ PrefixComparators.DOUBLE_DESC
+ }
+ }
+
/**
* Creates the prefix comparator for the first field in the given schema, in ascending order.
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 6a2d97c9b1..6aeefa6edd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -368,7 +368,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
*/
protected def newNaturalAscendingOrdering(dataTypes: Seq[DataType]): Ordering[InternalRow] = {
val order: Seq[SortOrder] = dataTypes.zipWithIndex.map {
- case (dt, index) => new SortOrder(BoundReference(index, dt, nullable = true), Ascending)
+ case (dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending)
}
newOrdering(order, Seq.empty)
}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql b/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql
new file mode 100644
index 0000000000..f7637b444b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql
@@ -0,0 +1,83 @@
+-- Q1. testing window functions with order by
+create table spark_10747(col1 int, col2 int, col3 int) using parquet;
+
+-- Q2. insert to tables
+INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8),
+(6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null);
+
+-- Q3. windowing with order by DESC NULLS LAST
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 desc nulls last, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2;
+
+-- Q4. windowing with order by DESC NULLS FIRST
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 desc nulls first, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2;
+
+-- Q5. windowing with order by ASC NULLS LAST
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 asc nulls last, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2;
+
+-- Q6. windowing with order by ASC NULLS FIRST
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 asc nulls first, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2;
+
+-- Q7. Regular query with ORDER BY ASC NULLS FIRST
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2;
+
+-- Q8. Regular query with ORDER BY ASC NULLS LAST
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2;
+
+-- Q9. Regular query with ORDER BY DESC NULLS FIRST
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2;
+
+-- Q10. Regular query with ORDER BY DESC NULLS LAST
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2;
+
+-- drop the test table
+drop table spark_10747;
+
+-- Q11. mix datatype for ORDER BY NULLS FIRST|LAST
+create table spark_10747_mix(
+col1 string,
+col2 int,
+col3 double,
+col4 decimal(10,2),
+col5 decimal(20,1))
+using parquet;
+
+-- Q12. Insert to the table
+INSERT INTO spark_10747_mix VALUES
+('b', 2, 1.0, 1.00, 10.0),
+('d', 3, 2.0, 3.00, 0.0),
+('c', 3, 2.0, 2.00, 15.1),
+('d', 3, 0.0, 3.00, 1.0),
+(null, 3, 0.0, 3.00, 1.0),
+('d', 3, null, 4.00, 1.0),
+('a', 1, 1.0, 1.00, null),
+('c', 3, 2.0, 2.00, null);
+
+-- Q13. Regular query with 2 NULLS LAST columns
+select * from spark_10747_mix order by col1 nulls last, col5 nulls last;
+
+-- Q14. Regular query with 2 NULLS FIRST columns
+select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first;
+
+-- Q15. Regular query with mixed NULLS FIRST|LAST
+select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last;
+
+-- drop the test table
+drop table spark_10747_mix;
+
+
diff --git a/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out b/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out
new file mode 100644
index 0000000000..c1b63dfb8c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out
@@ -0,0 +1,254 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 17
+
+
+-- !query 0
+create table spark_10747(col1 int, col2 int, col3 int) using parquet
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8),
+(6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 desc nulls last, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2
+-- !query 2 schema
+struct<col1:int,col2:int,col3:int,sum_col2:bigint>
+-- !query 2 output
+6 9 10 28
+6 13 NULL 34
+6 10 NULL 41
+6 12 10 43
+6 15 8 55
+6 15 8 56
+6 11 4 56
+6 7 8 58
+6 7 4 58
+
+
+-- !query 3
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 desc nulls first, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2
+-- !query 3 schema
+struct<col1:int,col2:int,col3:int,sum_col2:bigint>
+-- !query 3 output
+6 10 NULL 32
+6 11 4 33
+6 13 NULL 44
+6 7 4 48
+6 9 10 51
+6 15 8 55
+6 12 10 56
+6 15 8 56
+6 7 8 58
+
+
+-- !query 4
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 asc nulls last, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2
+-- !query 4 schema
+struct<col1:int,col2:int,col3:int,sum_col2:bigint>
+-- !query 4 output
+6 7 4 25
+6 13 NULL 35
+6 11 4 40
+6 10 NULL 44
+6 7 8 55
+6 15 8 57
+6 15 8 58
+6 12 10 59
+6 9 10 61
+
+
+-- !query 5
+select col1, col2, col3, sum(col2)
+ over (partition by col1
+ order by col3 asc nulls first, col2
+ rows between 2 preceding and 2 following ) as sum_col2
+from spark_10747 where col1 = 6 order by sum_col2
+-- !query 5 schema
+struct<col1:int,col2:int,col3:int,sum_col2:bigint>
+-- !query 5 output
+6 10 NULL 30
+6 12 10 36
+6 13 NULL 41
+6 7 4 48
+6 9 10 51
+6 11 4 53
+6 7 8 55
+6 15 8 57
+6 15 8 58
+
+
+-- !query 6
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2
+-- !query 6 schema
+struct<COL1:int,COL2:int,COL3:int>
+-- !query 6 output
+6 10 NULL
+6 13 NULL
+6 7 4
+6 11 4
+6 7 8
+6 15 8
+6 15 8
+6 9 10
+6 12 10
+
+
+-- !query 7
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2
+-- !query 7 schema
+struct<COL1:int,COL2:int,COL3:int>
+-- !query 7 output
+6 7 4
+6 11 4
+6 7 8
+6 15 8
+6 15 8
+6 9 10
+6 12 10
+6 10 NULL
+6 13 NULL
+
+
+-- !query 8
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2
+-- !query 8 schema
+struct<COL1:int,COL2:int,COL3:int>
+-- !query 8 output
+6 10 NULL
+6 13 NULL
+6 9 10
+6 12 10
+6 7 8
+6 15 8
+6 15 8
+6 7 4
+6 11 4
+
+
+-- !query 9
+SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2
+-- !query 9 schema
+struct<COL1:int,COL2:int,COL3:int>
+-- !query 9 output
+6 9 10
+6 12 10
+6 7 8
+6 15 8
+6 15 8
+6 7 4
+6 11 4
+6 10 NULL
+6 13 NULL
+
+
+-- !query 10
+drop table spark_10747
+-- !query 10 schema
+struct<>
+-- !query 10 output
+
+
+
+-- !query 11
+create table spark_10747_mix(
+col1 string,
+col2 int,
+col3 double,
+col4 decimal(10,2),
+col5 decimal(20,1))
+using parquet
+-- !query 11 schema
+struct<>
+-- !query 11 output
+
+
+
+-- !query 12
+INSERT INTO spark_10747_mix VALUES
+('b', 2, 1.0, 1.00, 10.0),
+('d', 3, 2.0, 3.00, 0.0),
+('c', 3, 2.0, 2.00, 15.1),
+('d', 3, 0.0, 3.00, 1.0),
+(null, 3, 0.0, 3.00, 1.0),
+('d', 3, null, 4.00, 1.0),
+('a', 1, 1.0, 1.00, null),
+('c', 3, 2.0, 2.00, null)
+-- !query 12 schema
+struct<>
+-- !query 12 output
+
+
+
+-- !query 13
+select * from spark_10747_mix order by col1 nulls last, col5 nulls last
+-- !query 13 schema
+struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
+-- !query 13 output
+a 1 1.0 1 NULL
+b 2 1.0 1 10
+c 3 2.0 2 15.1
+c 3 2.0 2 NULL
+d 3 2.0 3 0
+d 3 0.0 3 1
+d 3 NULL 4 1
+NULL 3 0.0 3 1
+
+
+-- !query 14
+select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first
+-- !query 14 schema
+struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
+-- !query 14 output
+NULL 3 0.0 3 1
+d 3 0.0 3 1
+d 3 NULL 4 1
+d 3 2.0 3 0
+c 3 2.0 2 NULL
+c 3 2.0 2 15.1
+b 2 1.0 1 10
+a 1 1.0 1 NULL
+
+
+-- !query 15
+select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last
+-- !query 15 schema
+struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
+-- !query 15 output
+c 3 2.0 2 NULL
+a 1 1.0 1 NULL
+c 3 2.0 2 15.1
+b 2 1.0 1 10
+d 3 0.0 3 1
+NULL 3 0.0 3 1
+d 3 NULL 4 1
+d 3 2.0 3 0
+
+
+-- !query 16
+drop table spark_10747_mix
+-- !query 16 schema
+struct<>
+-- !query 16 output
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
index ba3fa3732d..a7bbe34f4e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
@@ -101,7 +101,8 @@ class SortSuite extends SparkPlanTest with SharedSQLContext {
for (
dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType);
nullable <- Seq(true, false);
- sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil);
+ sortOrder <-
+ Seq('a.asc :: Nil, 'a.asc_nullsLast :: Nil, 'a.desc :: Nil, 'a.desc_nullsFirst :: Nil);
randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable)
) {
test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") {