aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorKevin Yu <qyu@us.ibm.com>2015-11-16 22:54:29 -0800
committerYin Huai <yhuai@databricks.com>2015-11-16 22:54:29 -0800
commite01865af0d5ebe11033de46c388c5c583876c187 (patch)
tree652dc7718b0c34b487743df9eeb68bb63a1226a5 /sql
parent75d202073143d5a7f943890d8682b5b0cf9e3092 (diff)
downloadspark-e01865af0d5ebe11033de46c388c5c583876c187.tar.gz
spark-e01865af0d5ebe11033de46c388c5c583876c187.tar.bz2
spark-e01865af0d5ebe11033de46c388c5c583876c187.zip
[SPARK-11447][SQL] change NullType to StringType during binaryComparison between NullType and StringType
During executing PromoteStrings rule, if one side of binaryComparison is StringType and the other side is not StringType, the current code will promote(cast) the StringType to DoubleType, and if the StringType doesn't contain the numbers, it will get null value. So if it is doing <=> (NULL-safe equal) with Null, it will not filter anything, caused the problem reported by this jira. I proposal to the changes through this PR, can you review my code changes ? This problem only happen for <=>, other operators works fine. scala> val filteredDF = df.filter(df("column") > (new Column(Literal(null)))) filteredDF: org.apache.spark.sql.DataFrame = [column: string] scala> filteredDF.show +------+ |column| +------+ +------+ scala> val filteredDF = df.filter(df("column") === (new Column(Literal(null)))) filteredDF: org.apache.spark.sql.DataFrame = [column: string] scala> filteredDF.show +------+ |column| +------+ +------+ scala> df.registerTempTable("DF") scala> sqlContext.sql("select * from DF where 'column' = NULL") res27: org.apache.spark.sql.DataFrame = [column: string] scala> res27.show +------+ |column| +------+ +------+ Author: Kevin Yu <qyu@us.ibm.com> Closes #9720 from kevinyu98/working_on_spark-11447.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala6
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala11
2 files changed, 17 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 92188ee54f..f90fc3cc12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -281,6 +281,12 @@ object HiveTypeCoercion {
case p @ BinaryComparison(left @ DateType(), right @ TimestampType()) =>
p.makeCopy(Array(Cast(left, StringType), Cast(right, StringType)))
+ // Checking NullType
+ case p @ BinaryComparison(left @ StringType(), right @ NullType()) =>
+ p.makeCopy(Array(left, Literal.create(null, StringType)))
+ case p @ BinaryComparison(left @ NullType(), right @ StringType()) =>
+ p.makeCopy(Array(Literal.create(null, StringType), right))
+
case p @ BinaryComparison(left @ StringType(), right) if right.dataType != StringType =>
p.makeCopy(Array(Cast(left, DoubleType), right))
case p @ BinaryComparison(left, right @ StringType()) if left.dataType != StringType =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 3eae3f6d85..38c0eb589f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -368,6 +368,17 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
checkAnswer(
nullData.filter($"a" <=> $"b"),
Row(1, 1) :: Row(null, null) :: Nil)
+
+ val nullData2 = sqlContext.createDataFrame(sparkContext.parallelize(
+ Row("abc") ::
+ Row(null) ::
+ Row("xyz") :: Nil),
+ StructType(Seq(StructField("a", StringType, true))))
+
+ checkAnswer(
+ nullData2.filter($"a" <=> null),
+ Row(null) :: Nil)
+
}
test(">") {