From 7b65030e7a0af3a0bd09370fb069d659b36ff7f0 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Tue, 16 Aug 2016 15:51:30 +0800 Subject: [SPARK-17034][SQL] adds expression UnresolvedOrdinal to represent the ordinals in GROUP BY or ORDER BY ## What changes were proposed in this pull request? This PR adds expression `UnresolvedOrdinal` to represent the ordinal in GROUP BY or ORDER BY, and fixes the rules when resolving ordinals. Ordinals in GROUP BY or ORDER BY like `1` in `order by 1` or `group by 1` should be considered as unresolved before analysis. But in current code, it uses `Literal` expression to store the ordinal. This is inappropriate as `Literal` itself is a resolved expression, it gives the user a wrong message that the ordinals has already been resolved. ### Before this change Ordinal is stored as `Literal` expression ``` scala> sc.setLogLevel("TRACE") scala> sql("select a from t group by 1 order by 1") ... 'Sort [1 ASC], true +- 'Aggregate [1], ['a] +- 'UnresolvedRelation `t ``` For query: ``` scala> Seq(1).toDF("a").createOrReplaceTempView("t") scala> sql("select count(a), a from t group by 2 having a > 0").show ``` During analysis, the intermediate plan before applying rule `ResolveAggregateFunctions` is: ``` 'Filter ('a > 0) +- Aggregate [2], [count(1) AS count(1)#83L, a#81] +- LocalRelation [value#7 AS a#9] ``` Before this PR, rule `ResolveAggregateFunctions` believes all expressions of `Aggregate` have already been resolved, and tries to resolve the expressions in `Filter` directly. But this is wrong, as ordinal `2` in Aggregate is not really resolved! ### After this change Ordinals are stored as `UnresolvedOrdinal`. ``` scala> sc.setLogLevel("TRACE") scala> sql("select a from t group by 1 order by 1") ... 'Sort [unresolvedordinal(1) ASC], true +- 'Aggregate [unresolvedordinal(1)], ['a] +- 'UnresolvedRelation `t` ``` ## How was this patch tested? Unit tests. Author: Sean Zhong Closes #14616 from clockfly/spark-16955. --- .../sql/catalyst/analysis/AnalysisSuite.scala | 2 +- .../UnresolvedOrdinalSubstitutionSuite.scala | 65 ++++++++++++++++++++++ 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnresolvedOrdinalSubstitutionSuite.scala (limited to 'sql/catalyst/src/test') diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 102c78bd72..22e1c9be05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.{SimpleCatalystConf, TableIdentifier} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnresolvedOrdinalSubstitutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnresolvedOrdinalSubstitutionSuite.scala new file mode 100644 index 0000000000..23995e96e1 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnresolvedOrdinalSubstitutionSuite.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.analysis.TestRelations.testRelation2 +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.SimpleCatalystConf + +class UnresolvedOrdinalSubstitutionSuite extends AnalysisTest { + + test("test rule UnresolvedOrdinalSubstitution, replaces ordinal in order by or group by") { + val a = testRelation2.output(0) + val b = testRelation2.output(1) + val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true) + + // Expression OrderByOrdinal is unresolved. + assert(!UnresolvedOrdinal(0).resolved) + + // Tests order by ordinal, apply single rule. + val plan = testRelation2.orderBy(Literal(1).asc, Literal(2).asc) + comparePlans( + new UnresolvedOrdinalSubstitution(conf).apply(plan), + testRelation2.orderBy(UnresolvedOrdinal(1).asc, UnresolvedOrdinal(2).asc)) + + // Tests order by ordinal, do full analysis + checkAnalysis(plan, testRelation2.orderBy(a.asc, b.asc)) + + // order by ordinal can be turned off by config + comparePlans( + new UnresolvedOrdinalSubstitution(conf.copy(orderByOrdinal = false)).apply(plan), + testRelation2.orderBy(Literal(1).asc, Literal(2).asc)) + + + // Tests group by ordinal, apply single rule. + val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b) + comparePlans( + new UnresolvedOrdinalSubstitution(conf).apply(plan2), + testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b)) + + // Tests group by ordinal, do full analysis + checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b)) + + // group by ordinal can be turned off by config + comparePlans( + new UnresolvedOrdinalSubstitution(conf.copy(groupByOrdinal = false)).apply(plan2), + testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)) + } +} -- cgit v1.2.3