diff options
author | OopsOutOfMemory <victorshengli@126.com> | 2015-06-17 13:37:59 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-06-17 13:37:59 -0700 |
commit | 98ee3512b26e87eeb22693a4a61b2c4981f38ca4 (patch) | |
tree | b6c978636f9b7ee8b8b2a9bd323cddca0303f07e /sql | |
parent | a46594435e0dd6cf86ac562bc11fc13d82b63c71 (diff) | |
download | spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.tar.gz spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.tar.bz2 spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.zip |
[SPARK-8010] [SQL] Promote types to StringType as implicit conversion in non-binary expression of HiveTypeCoercion
1. Given a query
`select coalesce(null, 1, '1') from dual` will cause exception:
java.lang.RuntimeException: Could not determine return type of Coalesce for IntegerType,StringType
2. Given a query:
`select case when true then 1 else '1' end from dual` will cause exception:
java.lang.RuntimeException: Types in CASE WHEN must be the same or coercible to a common type: StringType != IntegerType
I checked the code, the main cause is the HiveTypeCoercion doesn't do implicit convert when there is a IntegerType and StringType.
Numeric types can be promoted to string type
Hive will always do this implicit conversion.
Author: OopsOutOfMemory <victorshengli@126.com>
Closes #6551 from OopsOutOfMemory/pnts and squashes the following commits:
7a209d7 [OopsOutOfMemory] rebase master
6018613 [OopsOutOfMemory] convert function to method
4cd5618 [OopsOutOfMemory] limit the data type to primitive type
df365d2 [OopsOutOfMemory] refine
95cbd58 [OopsOutOfMemory] fix style
403809c [OopsOutOfMemory] promote non-string to string when can not found tighestCommonTypeOfTwo
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 20 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 10 |
2 files changed, 27 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index e7bf7cc1f1..189451d0d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -68,6 +68,19 @@ object HiveTypeCoercion { } /** + * Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use + * [[findTightestCommonTypeToString]] to find the TightestCommonType. + */ + private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = { + types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match { + case None => None + case Some(d) => + findTightestCommonTypeOfTwo(d, c).orElse(findTightestCommonTypeToString(d, c)) + }) + } + + + /** * Find the tightest common type of a set of types by continuously applying * `findTightestCommonTypeOfTwo` on these types. */ @@ -599,7 +612,7 @@ trait HiveTypeCoercion { // compatible with every child column. case Coalesce(es) if es.map(_.dataType).distinct.size > 1 => val types = es.map(_.dataType) - findTightestCommonType(types) match { + findTightestCommonTypeAndPromoteToString(types) match { case Some(finalDataType) => Coalesce(es.map(Cast(_, finalDataType))) case None => sys.error(s"Could not determine return type of Coalesce for ${types.mkString(",")}") @@ -634,7 +647,7 @@ trait HiveTypeCoercion { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case c: CaseWhenLike if c.childrenResolved && !c.valueTypesEqual => logDebug(s"Input values for null casting ${c.valueTypes.mkString(",")}") - val maybeCommonType = findTightestCommonType(c.valueTypes) + val maybeCommonType = findTightestCommonTypeAndPromoteToString(c.valueTypes) maybeCommonType.map { commonType => val castedBranches = c.branches.grouped(2).map { case Seq(when, value) if value.dataType != commonType => @@ -650,7 +663,8 @@ trait HiveTypeCoercion { }.getOrElse(c) case c: CaseKeyWhen if c.childrenResolved && !c.resolved => - val maybeCommonType = findTightestCommonType((c.key +: c.whenList).map(_.dataType)) + val maybeCommonType = + findTightestCommonTypeAndPromoteToString((c.key +: c.whenList).map(_.dataType)) maybeCommonType.map { commonType => val castedBranches = c.branches.grouped(2).map { case Seq(when, then) if when.dataType != commonType => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index a47cc30e92..1a6ee8169c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -45,6 +45,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils { Row("one", 6) :: Row("three", 3) :: Nil) } + test("SPARK-8010: promote numeric to string") { + val df = Seq((1, 1)).toDF("key", "value") + df.registerTempTable("src") + val queryCaseWhen = sql("select case when true then 1.0 else '1' end from src ") + val queryCoalesce = sql("select coalesce(null, 1, '1') from src ") + + checkAnswer(queryCaseWhen, Row("1.0") :: Nil) + checkAnswer(queryCoalesce, Row("1") :: Nil) + } + test("SPARK-6743: no columns from cache") { Seq( (83, 0, 38), |