aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-02-13 16:08:31 +0100
committerHerman van Hovell <hvanhovell@databricks.com>2017-02-13 16:08:31 +0100
commit4321ff9edda4961273ac4a5b02dc1aed03f05e47 (patch)
treea12f4daf2b7dd1e58b92e3ee94bbb7ff2d8c807c
parent04ad822534e8ded96a9ba4b7d43320e53c6d2808 (diff)
downloadspark-4321ff9edda4961273ac4a5b02dc1aed03f05e47.tar.gz
spark-4321ff9edda4961273ac4a5b02dc1aed03f05e47.tar.bz2
spark-4321ff9edda4961273ac4a5b02dc1aed03f05e47.zip
[SPARK-19544][SQL] Improve error message when some column types are compatible and others are not in set operations
## What changes were proposed in this pull request? This PR proposes to fix the error message when some data types are compatible and others are not in set/union operation. Currently, the code below: ```scala Seq((1,("a", 1))).toDF.union(Seq((1L,("a", "b"))).toDF) ``` throws an exception saying `LongType` and `IntegerType` are incompatible types. It should say something about `StructType`s with more readable format as below: **Before** ``` Union can only be performed on tables with the compatible column types. LongType <> IntegerType at the first column of the second table;; ``` **After** ``` Union can only be performed on tables with the compatible column types. struct<_1:string,_2:string> <> struct<_1:string,_2:int> at the second column of the second table;; ``` *I manually inserted a newline in the messages above for readability only in this PR description. ## How was this patch tested? Unit tests in `AnalysisErrorSuite`, manual tests and build wth Scala 2.10. Author: hyukjinkwon <gurwls223@gmail.com> Closes #16882 from HyukjinKwon/SPARK-19544.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala24
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala15
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala7
4 files changed, 38 insertions, 14 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index b4a7c05ee0..532ecb8757 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -321,12 +321,12 @@ trait CheckAnalysis extends PredicateHelper {
// Check if the data types match.
dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
// SPARK-18058: we shall not care about the nullability of columns
- if (!dt1.sameType(dt2)) {
+ if (TypeCoercion.findWiderTypeForTwo(dt1.asNullable, dt2.asNullable).isEmpty) {
failAnalysis(
s"""
|${operator.nodeName} can only be performed on tables with the compatible
- |column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} column of
- |the ${ordinalNumber(ti + 1)} table
+ |column types. ${dt1.catalogString} <> ${dt2.catalogString} at the
+ |${ordinalNumber(ci)} column of the ${ordinalNumber(ti + 1)} table
""".stripMargin.replace("\n", " ").trim())
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index b636c31703..dfaac92e04 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -116,17 +116,19 @@ object TypeCoercion {
* i.e. the main difference with [[findTightestCommonType]] is that here we allow some
* loss of precision when widening decimal and double, and promotion to string.
*/
- private def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = (t1, t2) match {
- case (t1: DecimalType, t2: DecimalType) =>
- Some(DecimalPrecision.widerDecimalType(t1, t2))
- case (t: IntegralType, d: DecimalType) =>
- Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
- case (d: DecimalType, t: IntegralType) =>
- Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
- case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
- Some(DoubleType)
- case _ =>
- findTightestCommonTypeToString(t1, t2)
+ private[analysis] def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = {
+ (t1, t2) match {
+ case (t1: DecimalType, t2: DecimalType) =>
+ Some(DecimalPrecision.widerDecimalType(t1, t2))
+ case (t: IntegralType, d: DecimalType) =>
+ Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
+ case (d: DecimalType, t: IntegralType) =>
+ Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
+ case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
+ Some(DoubleType)
+ case _ =>
+ findTightestCommonTypeToString(t1, t2)
+ }
}
private def findWiderCommonType(types: Seq[DataType]) = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 96aff37a4b..c5e877d128 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -283,16 +283,31 @@ class AnalysisErrorSuite extends AnalysisTest {
"union" :: "the compatible column types" :: Nil)
errorTest(
+ "union with a incompatible column type and compatible column types",
+ testRelation3.union(testRelation4),
+ "union" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
+
+ errorTest(
"intersect with incompatible column types",
testRelation.intersect(nestedRelation),
"intersect" :: "the compatible column types" :: Nil)
errorTest(
+ "intersect with a incompatible column type and compatible column types",
+ testRelation3.intersect(testRelation4),
+ "intersect" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
+
+ errorTest(
"except with incompatible column types",
testRelation.except(nestedRelation),
"except" :: "the compatible column types" :: Nil)
errorTest(
+ "except with a incompatible column type and compatible column types",
+ testRelation3.except(testRelation4),
+ "except" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
+
+ errorTest(
"SPARK-9955: correct error message for aggregate",
// When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.
testRelation2.where('bad_column > 1).groupBy('a)(UnresolvedAlias(max('b))),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
index 3741a6ba95..e12e272aed 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
@@ -37,6 +37,13 @@ object TestRelations {
AttributeReference("g", DoubleType)(),
AttributeReference("h", DecimalType(10, 2))())
+ // This is the same with `testRelation3` but only `h` is incompatible type.
+ val testRelation4 = LocalRelation(
+ AttributeReference("e", StringType)(),
+ AttributeReference("f", StringType)(),
+ AttributeReference("g", StringType)(),
+ AttributeReference("h", MapType(IntegerType, IntegerType))())
+
val nestedRelation = LocalRelation(
AttributeReference("top", StructType(
StructField("duplicateField", StringType) ::