[SPARK-11447][SQL] change NullType to StringType during binaryComparison between NullType and StringType

During executing PromoteStrings rule, if one side of binaryComparison is StringType and the other side is not StringType, the current code will promote(cast) the StringType to DoubleType, and if the StringType doesn't contain the numbers, it will get null value. So if it is doing <=> (NULL-safe equal) with Null, it will not filter anything, caused the problem reported by this jira. I proposal to the changes through this PR, can you review my code changes ? This problem only happen for <=>, other operators works fine. scala> val filteredDF = df.filter(df("column") > (new Column(Literal(null)))) filteredDF: org.apache.spark.sql.DataFrame = [column: string] scala> filteredDF.show +------+ |column| +------+ +------+ scala> val filteredDF = df.filter(df("column") === (new Column(Literal(null)))) filteredDF: org.apache.spark.sql.DataFrame = [column: string] scala> filteredDF.show +------+ |column| +------+ +------+ scala> df.registerTempTable("DF") scala> sqlContext.sql("select * from DF where 'column' = NULL") res27: org.apache.spark.sql.DataFrame = [column: string] scala> res27.show +------+ |column| +------+ +------+ Author: Kevin Yu <qyu@us.ibm.com> Closes #9720 from kevinyu98/working_on_spark-11447.
author: Kevin Yu <qyu@us.ibm.com> 2015-11-16 22:54:29 -0800
committer: Yin Huai <yhuai@databricks.com> 2015-11-16 22:54:29 -0800
commit: e01865af0d5ebe11033de46c388c5c583876c187 (patch)
tree: 652dc7718b0c34b487743df9eeb68bb63a1226a5 /sql
parent: 75d202073143d5a7f943890d8682b5b0cf9e3092 (diff)
download: spark-e01865af0d5ebe11033de46c388c5c583876c187.tar.gz
spark-e01865af0d5ebe11033de46c388c5c583876c187.tar.bz2
spark-e01865af0d5ebe11033de46c388c5c583876c187.zip
2 files changed, 17 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 92188ee54f..f90fc3cc12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -281,6 +281,12 @@ object HiveTypeCoercion {
       case p @ BinaryComparison(left @ DateType(), right @ TimestampType()) =>
         p.makeCopy(Array(Cast(left, StringType), Cast(right, StringType)))
 
+      // Checking NullType
+      case p @ BinaryComparison(left @ StringType(), right @ NullType()) =>
+        p.makeCopy(Array(left, Literal.create(null, StringType)))
+      case p @ BinaryComparison(left @ NullType(), right @ StringType()) =>
+        p.makeCopy(Array(Literal.create(null, StringType), right))
+
       case p @ BinaryComparison(left @ StringType(), right) if right.dataType != StringType =>
         p.makeCopy(Array(Cast(left, DoubleType), right))
       case p @ BinaryComparison(left, right @ StringType()) if left.dataType != StringType =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3eae3f6d85..38c0eb589f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -368,6 +368,17 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
     checkAnswer(
       nullData.filter($"a" <=> $"b"),
       Row(1, 1) :: Row(null, null) :: Nil)
+
+    val nullData2 = sqlContext.createDataFrame(sparkContext.parallelize(
+        Row("abc") ::
+        Row(null)  ::
+        Row("xyz") :: Nil),
+        StructType(Seq(StructField("a", StringType, true))))
+
+    checkAnswer(
+      nullData2.filter($"a" <=> null),
+      Row(null) :: Nil)
+
   }
 
   test(">") {
author	Kevin Yu <qyu@us.ibm.com>	2015-11-16 22:54:29 -0800
committer	Yin Huai <yhuai@databricks.com>	2015-11-16 22:54:29 -0800
commit	e01865af0d5ebe11033de46c388c5c583876c187 (patch)
tree	652dc7718b0c34b487743df9eeb68bb63a1226a5 /sql
parent	75d202073143d5a7f943890d8682b5b0cf9e3092 (diff)
download	spark-e01865af0d5ebe11033de46c388c5c583876c187.tar.gz spark-e01865af0d5ebe11033de46c388c5c583876c187.tar.bz2 spark-e01865af0d5ebe11033de46c388c5c583876c187.zip