aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-08-01 11:12:58 +0200
committerHerman van Hovell <hvanhovell@databricks.com>2016-08-01 11:12:58 +0200
commit64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d (patch)
treeed7205b0889da4d51d7478d28cedc2beef17455c /sql/catalyst/src/main
parent579fbcf3bd9717003025caecc0c0b85bcff7ac7f (diff)
downloadspark-64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d.tar.gz
spark-64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d.tar.bz2
spark-64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d.zip
[SPARK-16726][SQL] Improve `Union/Intersect/Except` error messages on incompatible types
## What changes were proposed in this pull request? Currently, `UNION` queries on incompatible types show misleading error messages, i.e., `unresolved operator Union`. We had better show a more correct message. This will help users in the situation of [SPARK-16704](https://issues.apache.org/jira/browse/SPARK-16704). **Before** ```scala scala> sql("select 1,2,3 union (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: unresolved operator 'Union; scala> sql("select 1,2,3 intersect (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: unresolved operator 'Intersect; scala> sql("select 1,2,3 except (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: unresolved operator 'Except; ``` **After** ```scala scala> sql("select 1,2,3 union (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: Union can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the second column of the second table; scala> sql("select 1,2,3 intersect (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: Intersect can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the second column of the second table; scala> sql("select 1,2,3 except (select array(1),array(2),3)") org.apache.spark.sql.AnalysisException: Except can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the first column of the second table; ``` ## How was this patch tested? Pass the Jenkins test with a new test case. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #14355 from dongjoon-hyun/SPARK-16726.
Diffstat (limited to 'sql/catalyst/src/main')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala44
1 files changed, 31 insertions, 13 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 8b87a4e41c..41b7e62d8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -253,19 +253,6 @@ trait CheckAnalysis extends PredicateHelper {
}
}
- case s @ SetOperation(left, right) if left.output.length != right.output.length =>
- failAnalysis(
- s"${s.nodeName} can only be performed on tables with the same number of columns, " +
- s"but the left table has ${left.output.length} columns and the right has " +
- s"${right.output.length}")
-
- case s: Union if s.children.exists(_.output.length != s.children.head.output.length) =>
- val firstError = s.children.find(_.output.length != s.children.head.output.length).get
- failAnalysis(
- s"Unions can only be performed on tables with the same number of columns, " +
- s"but one table has '${firstError.output.length}' columns and another table has " +
- s"'${s.children.head.output.length}' columns")
-
case GlobalLimit(limitExpr, _) => checkLimitClause(limitExpr)
case LocalLimit(limitExpr, _) => checkLimitClause(limitExpr)
@@ -280,6 +267,37 @@ trait CheckAnalysis extends PredicateHelper {
case p if p.expressions.exists(PredicateSubquery.hasPredicateSubquery) =>
failAnalysis(s"Predicate sub-queries can only be used in a Filter: $p")
+ case _: Union | _: SetOperation if operator.children.length > 1 =>
+ def dataTypes(plan: LogicalPlan): Seq[DataType] = plan.output.map(_.dataType)
+ def ordinalNumber(i: Int): String = i match {
+ case 0 => "first"
+ case 1 => "second"
+ case i => s"${i}th"
+ }
+ val ref = dataTypes(operator.children.head)
+ operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
+ // Check the number of columns
+ if (child.output.length != ref.length) {
+ failAnalysis(
+ s"""
+ |${operator.nodeName} can only be performed on tables with the same number
+ |of columns, but the first table has ${ref.length} columns and
+ |the ${ordinalNumber(ti + 1)} table has ${child.output.length} columns
+ """.stripMargin.replace("\n", " ").trim())
+ }
+ // Check if the data types match.
+ dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
+ if (dt1 != dt2) {
+ failAnalysis(
+ s"""
+ |${operator.nodeName} can only be performed on tables with the compatible
+ |column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} column of
+ |the ${ordinalNumber(ti + 1)} table
+ """.stripMargin.replace("\n", " ").trim())
+ }
+ }
+ }
+
case _ => // Fallbacks to the following checks
}