[SPARK-8010] [SQL] Promote types to StringType as implicit conversion in non-binary expression of HiveTypeCoercion

1. Given a query `select coalesce(null, 1, '1') from dual` will cause exception: java.lang.RuntimeException: Could not determine return type of Coalesce for IntegerType,StringType 2. Given a query: `select case when true then 1 else '1' end from dual` will cause exception: java.lang.RuntimeException: Types in CASE WHEN must be the same or coercible to a common type: StringType != IntegerType I checked the code, the main cause is the HiveTypeCoercion doesn't do implicit convert when there is a IntegerType and StringType. Numeric types can be promoted to string type Hive will always do this implicit conversion. Author: OopsOutOfMemory <victorshengli@126.com> Closes #6551 from OopsOutOfMemory/pnts and squashes the following commits: 7a209d7 [OopsOutOfMemory] rebase master 6018613 [OopsOutOfMemory] convert function to method 4cd5618 [OopsOutOfMemory] limit the data type to primitive type df365d2 [OopsOutOfMemory] refine 95cbd58 [OopsOutOfMemory] fix style 403809c [OopsOutOfMemory] promote non-string to string when can not found tighestCommonTypeOfTwo
author: OopsOutOfMemory <victorshengli@126.com> 2015-06-17 13:37:59 -0700
committer: Michael Armbrust <michael@databricks.com> 2015-06-17 13:37:59 -0700
commit: 98ee3512b26e87eeb22693a4a61b2c4981f38ca4 (patch)
tree: b6c978636f9b7ee8b8b2a9bd323cddca0303f07e /sql
parent: a46594435e0dd6cf86ac562bc11fc13d82b63c71 (diff)
download: spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.tar.gz
spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.tar.bz2
spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.zip
2 files changed, 27 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index e7bf7cc1f1..189451d0d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -68,6 +68,19 @@ object HiveTypeCoercion {
   }
 
   /**
+   * Similar to [[findTightestCommonType]], if can not find the TightestCommonType, try to use
+   * [[findTightestCommonTypeToString]] to find the TightestCommonType.
+   */
+  private def findTightestCommonTypeAndPromoteToString(types: Seq[DataType]): Option[DataType] = {
+    types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
+      case None => None
+      case Some(d) =>
+        findTightestCommonTypeOfTwo(d, c).orElse(findTightestCommonTypeToString(d, c))
+    })
+  }
+
+
+  /**
    * Find the tightest common type of a set of types by continuously applying
    * `findTightestCommonTypeOfTwo` on these types.
    */
@@ -599,7 +612,7 @@ trait HiveTypeCoercion {
       // compatible with every child column.
       case Coalesce(es) if es.map(_.dataType).distinct.size > 1 =>
         val types = es.map(_.dataType)
-        findTightestCommonType(types) match {
+        findTightestCommonTypeAndPromoteToString(types) match {
           case Some(finalDataType) => Coalesce(es.map(Cast(_, finalDataType)))
           case None =>
             sys.error(s"Could not determine return type of Coalesce for ${types.mkString(",")}")
@@ -634,7 +647,7 @@ trait HiveTypeCoercion {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case c: CaseWhenLike if c.childrenResolved && !c.valueTypesEqual =>
         logDebug(s"Input values for null casting ${c.valueTypes.mkString(",")}")
-        val maybeCommonType = findTightestCommonType(c.valueTypes)
+        val maybeCommonType = findTightestCommonTypeAndPromoteToString(c.valueTypes)
         maybeCommonType.map { commonType =>
           val castedBranches = c.branches.grouped(2).map {
             case Seq(when, value) if value.dataType != commonType =>
@@ -650,7 +663,8 @@ trait HiveTypeCoercion {
         }.getOrElse(c)
 
       case c: CaseKeyWhen if c.childrenResolved && !c.resolved =>
-        val maybeCommonType = findTightestCommonType((c.key +: c.whenList).map(_.dataType))
+        val maybeCommonType =
+          findTightestCommonTypeAndPromoteToString((c.key +: c.whenList).map(_.dataType))
         maybeCommonType.map { commonType =>
           val castedBranches = c.branches.grouped(2).map {
             case Seq(when, then) if when.dataType != commonType =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index a47cc30e92..1a6ee8169c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -45,6 +45,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll with SQLTestUtils {
       Row("one", 6) :: Row("three", 3) :: Nil)
   }
 
+  test("SPARK-8010: promote numeric to string") {
+    val df = Seq((1, 1)).toDF("key", "value")
+    df.registerTempTable("src")
+    val queryCaseWhen = sql("select case when true then 1.0 else '1' end from src ")
+    val queryCoalesce = sql("select coalesce(null, 1, '1') from src ")
+
+    checkAnswer(queryCaseWhen, Row("1.0") :: Nil)
+    checkAnswer(queryCoalesce, Row("1") :: Nil)
+  }
+
   test("SPARK-6743: no columns from cache") {
     Seq(
       (83, 0, 38),
author	OopsOutOfMemory <victorshengli@126.com>	2015-06-17 13:37:59 -0700
committer	Michael Armbrust <michael@databricks.com>	2015-06-17 13:37:59 -0700
commit	98ee3512b26e87eeb22693a4a61b2c4981f38ca4 (patch)
tree	b6c978636f9b7ee8b8b2a9bd323cddca0303f07e /sql
parent	a46594435e0dd6cf86ac562bc11fc13d82b63c71 (diff)
download	spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.tar.gz spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.tar.bz2 spark-98ee3512b26e87eeb22693a4a61b2c4981f38ca4.zip