From a1ff72e1cce6f22249ccc4905e8cef30075beb2f Mon Sep 17 00:00:00 2001 From: petermaxlee Date: Tue, 2 Aug 2016 19:32:35 +0800 Subject: [SPARK-16850][SQL] Improve type checking error message for greatest/least ## What changes were proposed in this pull request? Greatest/least function does not have the most friendly error message for data types. This patch improves the error message to not show the Seq type, and use more human readable data types. Before: ``` org.apache.spark.sql.AnalysisException: cannot resolve 'greatest(CAST(1.0 AS DECIMAL(2,1)), "1.0")' due to data type mismatch: The expressions should all have the same type, got GREATEST (ArrayBuffer(DecimalType(2,1), StringType)).; line 1 pos 7 ``` After: ``` org.apache.spark.sql.AnalysisException: cannot resolve 'greatest(CAST(1.0 AS DECIMAL(2,1)), "1.0")' due to data type mismatch: The expressions should all have the same type, got GREATEST(decimal(2,1), string).; line 1 pos 7 ``` ## How was this patch tested? Manually verified the output and also added unit tests to ConditionalExpressionSuite. Author: petermaxlee Closes #14453 from petermaxlee/SPARK-16850. --- .../sql/catalyst/expressions/conditionalExpressions.scala | 4 ++-- .../catalyst/expressions/ConditionalExpressionSuite.scala | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index e97e08947a..5f2585fc40 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -299,7 +299,7 @@ case class Least(children: Seq[Expression]) extends Expression { } else if (children.map(_.dataType).distinct.count(_ != NullType) > 1) { TypeCheckResult.TypeCheckFailure( s"The expressions should all have the same type," + - s" got LEAST (${children.map(_.dataType)}).") + s" got LEAST(${children.map(_.dataType.simpleString).mkString(", ")}).") } else { TypeUtils.checkForOrderingExpr(dataType, "function " + prettyName) } @@ -359,7 +359,7 @@ case class Greatest(children: Seq[Expression]) extends Expression { } else if (children.map(_.dataType).distinct.count(_ != NullType) > 1) { TypeCheckResult.TypeCheckFailure( s"The expressions should all have the same type," + - s" got GREATEST (${children.map(_.dataType)}).") + s" got GREATEST(${children.map(_.dataType.simpleString).mkString(", ")}).") } else { TypeUtils.checkForOrderingExpr(dataType, "function " + prettyName) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala index 3c581ecdaf..36185b8c63 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala @@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp} import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.types._ @@ -181,6 +182,12 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper Literal(Timestamp.valueOf("2015-07-01 10:00:00")))), Timestamp.valueOf("2015-07-01 08:00:00"), InternalRow.empty) + // Type checking error + assert( + Least(Seq(Literal(1), Literal("1"))).checkInputDataTypes() == + TypeCheckFailure("The expressions should all have the same type, " + + "got LEAST(int, string).")) + DataTypeTestUtils.ordered.foreach { dt => checkConsistencyBetweenInterpretedAndCodegen(Least, dt, 2) } @@ -227,6 +234,12 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper Literal(Timestamp.valueOf("2015-07-01 10:00:00")))), Timestamp.valueOf("2015-07-01 10:00:00"), InternalRow.empty) + // Type checking error + assert( + Greatest(Seq(Literal(1), Literal("1"))).checkInputDataTypes() == + TypeCheckFailure("The expressions should all have the same type, " + + "got GREATEST(int, string).")) + DataTypeTestUtils.ordered.foreach { dt => checkConsistencyBetweenInterpretedAndCodegen(Greatest, dt, 2) } -- cgit v1.2.3