aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-11-26 14:57:48 -0800
committerHerman van Hovell <hvanhovell@databricks.com>2016-11-26 14:57:48 -0800
commit9c03c564605783d8e94f6795432bb59c33933e52 (patch)
treecba4cfdaae34c80125365438e3d7ca404ec1b19e /sql/catalyst/src
parentf4a98e421e14434fddc3f9f1018a17124d660ef0 (diff)
downloadspark-9c03c564605783d8e94f6795432bb59c33933e52.tar.gz
spark-9c03c564605783d8e94f6795432bb59c33933e52.tar.bz2
spark-9c03c564605783d8e94f6795432bb59c33933e52.zip
[SPARK-17251][SQL] Improve `OuterReference` to be `NamedExpression`
## What changes were proposed in this pull request? Currently, `OuterReference` is not `NamedExpression`. So, it raises 'ClassCastException` when it used in projection lists of IN correlated subqueries. This PR aims to support that by making `OuterReference` as `NamedExpression` to show correct error messages. ```scala scala> sql("CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES 1, 2 AS t1(a)") scala> sql("CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES 1 AS t2(b)") scala> sql("SELECT a FROM t1 WHERE a IN (SELECT a FROM t2)").show java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.OuterReference cannot be cast to org.apache.spark.sql.catalyst.expressions.NamedExpression ``` ## How was this patch tested? Pass the Jenkins test with new test cases. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #16015 from dongjoon-hyun/SPARK-17251-2.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala3
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala9
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala43
3 files changed, 53 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 1db44496e6..e03cae8ecd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1027,7 +1027,8 @@ class Analyzer(
def failOnOuterReference(p: LogicalPlan): Unit = {
if (p.expressions.exists(containsOuter)) {
failAnalysis(
- s"Correlated predicates are not supported outside of WHERE/HAVING clauses: $p")
+ "Expressions referencing the outer query are not supported outside of WHERE/HAVING " +
+ s"clauses: $p")
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 1274757136..c842f85af6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -356,10 +356,17 @@ case class PrettyAttribute(
* A place holder used to hold a reference that has been resolved to a field outside of the current
* plan. This is used for correlated subqueries.
*/
-case class OuterReference(e: NamedExpression) extends LeafExpression with Unevaluable {
+case class OuterReference(e: NamedExpression)
+ extends LeafExpression with NamedExpression with Unevaluable {
override def dataType: DataType = e.dataType
override def nullable: Boolean = e.nullable
override def prettyName: String = "outer"
+
+ override def name: String = e.name
+ override def qualifier: Option[String] = e.qualifier
+ override def exprId: ExprId = e.exprId
+ override def toAttribute: Attribute = e.toAttribute
+ override def newInstance(): NamedExpression = OuterReference(e.newInstance())
}
object VirtualColumn {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
new file mode 100644
index 0000000000..4aafb2b83f
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{In, ListQuery, OuterReference}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, Project}
+
+/**
+ * Unit tests for [[ResolveSubquery]].
+ */
+class ResolveSubquerySuite extends AnalysisTest {
+
+ val a = 'a.int
+ val b = 'b.int
+ val t1 = LocalRelation(a)
+ val t2 = LocalRelation(b)
+
+ test("SPARK-17251 Improve `OuterReference` to be `NamedExpression`") {
+ val expr = Filter(In(a, Seq(ListQuery(Project(Seq(OuterReference(a)), t2)))), t1)
+ val m = intercept[AnalysisException] {
+ SimpleAnalyzer.ResolveSubquery(expr)
+ }.getMessage
+ assert(m.contains(
+ "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"))
+ }
+}