diff options
Diffstat (limited to 'sql/core')
5 files changed, 398 insertions, 12 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala index 940467e74d..c6665d273f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala @@ -40,22 +40,70 @@ object SortPrefixUtils { def getPrefixComparator(sortOrder: SortOrder): PrefixComparator = { sortOrder.dataType match { - case StringType => - if (sortOrder.isAscending) PrefixComparators.STRING else PrefixComparators.STRING_DESC - case BinaryType => - if (sortOrder.isAscending) PrefixComparators.BINARY else PrefixComparators.BINARY_DESC + case StringType => stringPrefixComparator(sortOrder) + case BinaryType => binaryPrefixComparator(sortOrder) case BooleanType | ByteType | ShortType | IntegerType | LongType | DateType | TimestampType => - if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC + longPrefixComparator(sortOrder) case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - if (sortOrder.isAscending) PrefixComparators.LONG else PrefixComparators.LONG_DESC - case FloatType | DoubleType => - if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC - case dt: DecimalType => - if (sortOrder.isAscending) PrefixComparators.DOUBLE else PrefixComparators.DOUBLE_DESC + longPrefixComparator(sortOrder) + case FloatType | DoubleType => doublePrefixComparator(sortOrder) + case dt: DecimalType => doublePrefixComparator(sortOrder) case _ => NoOpPrefixComparator } } + private def stringPrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.STRING_NULLS_LAST + case Ascending => + PrefixComparators.STRING + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.STRING_DESC_NULLS_FIRST + case Descending => + PrefixComparators.STRING_DESC + } + } + + private def binaryPrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.BINARY_NULLS_LAST + case Ascending => + PrefixComparators.BINARY + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.BINARY_DESC_NULLS_FIRST + case Descending => + PrefixComparators.BINARY_DESC + } + } + + private def longPrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.LONG_NULLS_LAST + case Ascending => + PrefixComparators.LONG + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.LONG_DESC_NULLS_FIRST + case Descending => + PrefixComparators.LONG_DESC + } + } + + private def doublePrefixComparator(sortOrder: SortOrder): PrefixComparator = { + sortOrder.direction match { + case Ascending if (sortOrder.nullOrdering == NullsLast) => + PrefixComparators.DOUBLE_NULLS_LAST + case Ascending => + PrefixComparators.DOUBLE + case Descending if (sortOrder.nullOrdering == NullsFirst) => + PrefixComparators.DOUBLE_DESC_NULLS_FIRST + case Descending => + PrefixComparators.DOUBLE_DESC + } + } + /** * Creates the prefix comparator for the first field in the given schema, in ascending order. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index 6a2d97c9b1..6aeefa6edd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -368,7 +368,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ */ protected def newNaturalAscendingOrdering(dataTypes: Seq[DataType]): Ordering[InternalRow] = { val order: Seq[SortOrder] = dataTypes.zipWithIndex.map { - case (dt, index) => new SortOrder(BoundReference(index, dt, nullable = true), Ascending) + case (dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending) } newOrdering(order, Seq.empty) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql b/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql new file mode 100644 index 0000000000..f7637b444b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/orderby-nulls-ordering.sql @@ -0,0 +1,83 @@ +-- Q1. testing window functions with order by +create table spark_10747(col1 int, col2 int, col3 int) using parquet; + +-- Q2. insert to tables +INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), +(6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null); + +-- Q3. windowing with order by DESC NULLS LAST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q4. windowing with order by DESC NULLS FIRST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q5. windowing with order by ASC NULLS LAST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q6. windowing with order by ASC NULLS FIRST +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2; + +-- Q7. Regular query with ORDER BY ASC NULLS FIRST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2; + +-- Q8. Regular query with ORDER BY ASC NULLS LAST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2; + +-- Q9. Regular query with ORDER BY DESC NULLS FIRST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2; + +-- Q10. Regular query with ORDER BY DESC NULLS LAST +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2; + +-- drop the test table +drop table spark_10747; + +-- Q11. mix datatype for ORDER BY NULLS FIRST|LAST +create table spark_10747_mix( +col1 string, +col2 int, +col3 double, +col4 decimal(10,2), +col5 decimal(20,1)) +using parquet; + +-- Q12. Insert to the table +INSERT INTO spark_10747_mix VALUES +('b', 2, 1.0, 1.00, 10.0), +('d', 3, 2.0, 3.00, 0.0), +('c', 3, 2.0, 2.00, 15.1), +('d', 3, 0.0, 3.00, 1.0), +(null, 3, 0.0, 3.00, 1.0), +('d', 3, null, 4.00, 1.0), +('a', 1, 1.0, 1.00, null), +('c', 3, 2.0, 2.00, null); + +-- Q13. Regular query with 2 NULLS LAST columns +select * from spark_10747_mix order by col1 nulls last, col5 nulls last; + +-- Q14. Regular query with 2 NULLS FIRST columns +select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first; + +-- Q15. Regular query with mixed NULLS FIRST|LAST +select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last; + +-- drop the test table +drop table spark_10747_mix; + + diff --git a/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out b/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out new file mode 100644 index 0000000000..c1b63dfb8c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/orderby-nulls-ordering.sql.out @@ -0,0 +1,254 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 17 + + +-- !query 0 +create table spark_10747(col1 int, col2 int, col3 int) using parquet +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8), +(6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 2 schema +struct<col1:int,col2:int,col3:int,sum_col2:bigint> +-- !query 2 output +6 9 10 28 +6 13 NULL 34 +6 10 NULL 41 +6 12 10 43 +6 15 8 55 +6 15 8 56 +6 11 4 56 +6 7 8 58 +6 7 4 58 + + +-- !query 3 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 desc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 3 schema +struct<col1:int,col2:int,col3:int,sum_col2:bigint> +-- !query 3 output +6 10 NULL 32 +6 11 4 33 +6 13 NULL 44 +6 7 4 48 +6 9 10 51 +6 15 8 55 +6 12 10 56 +6 15 8 56 +6 7 8 58 + + +-- !query 4 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls last, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 4 schema +struct<col1:int,col2:int,col3:int,sum_col2:bigint> +-- !query 4 output +6 7 4 25 +6 13 NULL 35 +6 11 4 40 +6 10 NULL 44 +6 7 8 55 +6 15 8 57 +6 15 8 58 +6 12 10 59 +6 9 10 61 + + +-- !query 5 +select col1, col2, col3, sum(col2) + over (partition by col1 + order by col3 asc nulls first, col2 + rows between 2 preceding and 2 following ) as sum_col2 +from spark_10747 where col1 = 6 order by sum_col2 +-- !query 5 schema +struct<col1:int,col2:int,col3:int,sum_col2:bigint> +-- !query 5 output +6 10 NULL 30 +6 12 10 36 +6 13 NULL 41 +6 7 4 48 +6 9 10 51 +6 11 4 53 +6 7 8 55 +6 15 8 57 +6 15 8 58 + + +-- !query 6 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2 +-- !query 6 schema +struct<COL1:int,COL2:int,COL3:int> +-- !query 6 output +6 10 NULL +6 13 NULL +6 7 4 +6 11 4 +6 7 8 +6 15 8 +6 15 8 +6 9 10 +6 12 10 + + +-- !query 7 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2 +-- !query 7 schema +struct<COL1:int,COL2:int,COL3:int> +-- !query 7 output +6 7 4 +6 11 4 +6 7 8 +6 15 8 +6 15 8 +6 9 10 +6 12 10 +6 10 NULL +6 13 NULL + + +-- !query 8 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2 +-- !query 8 schema +struct<COL1:int,COL2:int,COL3:int> +-- !query 8 output +6 10 NULL +6 13 NULL +6 9 10 +6 12 10 +6 7 8 +6 15 8 +6 15 8 +6 7 4 +6 11 4 + + +-- !query 9 +SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2 +-- !query 9 schema +struct<COL1:int,COL2:int,COL3:int> +-- !query 9 output +6 9 10 +6 12 10 +6 7 8 +6 15 8 +6 15 8 +6 7 4 +6 11 4 +6 10 NULL +6 13 NULL + + +-- !query 10 +drop table spark_10747 +-- !query 10 schema +struct<> +-- !query 10 output + + + +-- !query 11 +create table spark_10747_mix( +col1 string, +col2 int, +col3 double, +col4 decimal(10,2), +col5 decimal(20,1)) +using parquet +-- !query 11 schema +struct<> +-- !query 11 output + + + +-- !query 12 +INSERT INTO spark_10747_mix VALUES +('b', 2, 1.0, 1.00, 10.0), +('d', 3, 2.0, 3.00, 0.0), +('c', 3, 2.0, 2.00, 15.1), +('d', 3, 0.0, 3.00, 1.0), +(null, 3, 0.0, 3.00, 1.0), +('d', 3, null, 4.00, 1.0), +('a', 1, 1.0, 1.00, null), +('c', 3, 2.0, 2.00, null) +-- !query 12 schema +struct<> +-- !query 12 output + + + +-- !query 13 +select * from spark_10747_mix order by col1 nulls last, col5 nulls last +-- !query 13 schema +struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)> +-- !query 13 output +a 1 1.0 1 NULL +b 2 1.0 1 10 +c 3 2.0 2 15.1 +c 3 2.0 2 NULL +d 3 2.0 3 0 +d 3 0.0 3 1 +d 3 NULL 4 1 +NULL 3 0.0 3 1 + + +-- !query 14 +select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first +-- !query 14 schema +struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)> +-- !query 14 output +NULL 3 0.0 3 1 +d 3 0.0 3 1 +d 3 NULL 4 1 +d 3 2.0 3 0 +c 3 2.0 2 NULL +c 3 2.0 2 15.1 +b 2 1.0 1 10 +a 1 1.0 1 NULL + + +-- !query 15 +select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last +-- !query 15 schema +struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)> +-- !query 15 output +c 3 2.0 2 NULL +a 1 1.0 1 NULL +c 3 2.0 2 15.1 +b 2 1.0 1 10 +d 3 0.0 3 1 +NULL 3 0.0 3 1 +d 3 NULL 4 1 +d 3 2.0 3 0 + + +-- !query 16 +drop table spark_10747_mix +-- !query 16 schema +struct<> +-- !query 16 output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala index ba3fa3732d..a7bbe34f4e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala @@ -101,7 +101,8 @@ class SortSuite extends SparkPlanTest with SharedSQLContext { for ( dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType); nullable <- Seq(true, false); - sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil); + sortOrder <- + Seq('a.asc :: Nil, 'a.asc_nullsLast :: Nil, 'a.desc :: Nil, 'a.desc_nullsFirst :: Nil); randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable) ) { test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") { |