From 2c5b18fb0fdeabd378dd97e91f72d1eac4e21cc7 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 21 Mar 2016 18:08:41 +0800 Subject: [SPARK-12789][SQL] Support Order By Ordinal in SQL #### What changes were proposed in this pull request? This PR is to support order by position in SQL, e.g. ```SQL select c1, c2, c3 from tbl order by 1 desc, 3 ``` should be equivalent to ```SQL select c1, c2, c3 from tbl order by c1 desc, c3 asc ``` This is controlled by config option `spark.sql.orderByOrdinal`. - When true, the ordinal numbers are treated as the position in the select list. - When false, the ordinal number in order/sort By clause are ignored. - Only convert integer literals (not foldable expressions). If found foldable expressions, ignore them - This also works with select *. **Question**: Do we still need sort by columns that contain zero reference? In this case, it will have no impact on the sorting results. IMO, we should not allow users do it. rxin cloud-fan marmbrus yhuai hvanhovell -- Update: In these cases, they are ignored in this case. **Note**: This PR is taken from https://github.com/apache/spark/pull/10731. When merging this PR, please give the credit to zhichao-li Also cc all the people who are involved in the previous discussion: adrian-wang chenghao-intel tejasapatil #### How was this patch tested? Added a few test cases for both positive and negative test cases. Author: gatorsmile Closes #11815 from gatorsmile/orderByPosition. --- .../org/apache/spark/sql/internal/SQLConf.scala | 7 ++++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 43 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'sql/core') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 9e0878a514..3d1d5b120a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -435,6 +435,11 @@ object SQLConf { defaultValue = Some(true), doc = "When false, we will treat bucketed table as normal table.") + val ORDER_BY_ORDINAL = booleanConf("spark.sql.orderByOrdinal", + defaultValue = Some(true), + doc = "When true, the ordinal numbers are treated as the position in the select list. " + + "When false, the ordinal numbers in order/sort By clause are ignored.") + // The output committer class used by HadoopFsRelation. The specified class needs to be a // subclass of org.apache.hadoop.mapreduce.OutputCommitter. // @@ -634,6 +639,8 @@ class SQLConf extends Serializable with CatalystConf with ParserConf with Loggin def supportSQL11ReservedKeywords: Boolean = getConf(PARSER_SUPPORT_SQL11_RESERVED_KEYWORDS) + override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 6716982118..b765fd8b66 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -21,6 +21,8 @@ import java.math.MathContext import java.sql.Timestamp import org.apache.spark.AccumulatorSuite +import org.apache.spark.sql.catalyst.analysis.UnresolvedException +import org.apache.spark.sql.catalyst.expressions.SortOrder import org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.execution.joins.{BroadcastHashJoin, CartesianProduct, SortMergeJoin} import org.apache.spark.sql.functions._ @@ -2156,6 +2158,47 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } + test("order by ordinal number") { + checkAnswer( + sql("SELECT * FROM testData2 ORDER BY 1 DESC"), + sql("SELECT * FROM testData2 ORDER BY a DESC")) + // If the position is not an integer, ignore it. + checkAnswer( + sql("SELECT * FROM testData2 ORDER BY 1 + 0 DESC, b ASC"), + sql("SELECT * FROM testData2 ORDER BY b ASC")) + + checkAnswer( + sql("SELECT * FROM testData2 ORDER BY 1 DESC, b ASC"), + sql("SELECT * FROM testData2 ORDER BY a DESC, b ASC")) + checkAnswer( + sql("SELECT * FROM testData2 SORT BY 1 DESC, 2"), + sql("SELECT * FROM testData2 SORT BY a DESC, b ASC")) + checkAnswer( + sql("SELECT * FROM testData2 ORDER BY 1 ASC, b ASC"), + Seq(Row(1, 1), Row(1, 2), Row(2, 1), Row(2, 2), Row(3, 1), Row(3, 2))) + } + + test("order by ordinal number - negative cases") { + intercept[UnresolvedException[SortOrder]] { + sql("SELECT * FROM testData2 ORDER BY 0") + } + intercept[UnresolvedException[SortOrder]] { + sql("SELECT * FROM testData2 ORDER BY -1 DESC, b ASC") + } + intercept[UnresolvedException[SortOrder]] { + sql("SELECT * FROM testData2 ORDER BY 3 DESC, b ASC") + } + } + + test("order by ordinal number with conf spark.sql.orderByOrdinal=false") { + withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "false") { + // If spark.sql.orderByOrdinal=false, ignore the position number. + checkAnswer( + sql("SELECT * FROM testData2 ORDER BY 1 DESC, b ASC"), + sql("SELECT * FROM testData2 ORDER BY b ASC")) + } + } + test("natural join") { val df1 = Seq(("one", 1), ("two", 2), ("three", 3)).toDF("k", "v1") val df2 = Seq(("one", 1), ("two", 22), ("one", 5)).toDF("k", "v2") -- cgit v1.2.3