aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-10-25 12:08:17 -0700
committerYin Huai <yhuai@databricks.com>2016-10-25 12:08:17 -0700
commita21791e3164f4e6546fbe0a90017a4394a05deb1 (patch)
treef15bfd0026fc16fb87868ec943ed4eeddef37594
parentc5fe3dd4f59c464c830b414acccd3cca0fdd877c (diff)
downloadspark-a21791e3164f4e6546fbe0a90017a4394a05deb1.tar.gz
spark-a21791e3164f4e6546fbe0a90017a4394a05deb1.tar.bz2
spark-a21791e3164f4e6546fbe0a90017a4394a05deb1.zip
[SPARK-18070][SQL] binary operator should not consider nullability when comparing input types
## What changes were proposed in this pull request? Binary operator requires its inputs to be of same type, but it should not consider nullability, e.g. `EqualTo` should be able to compare an element-nullable array and an element-non-nullable array. ## How was this patch tested? a regression test in `DataFrameSuite` Author: Wenchen Fan <wenchen@databricks.com> Closes #15606 from cloud-fan/type-bug.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala9
2 files changed, 10 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index fa1a2ad56c..9edc1ceff2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -511,7 +511,7 @@ abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {
override def checkInputDataTypes(): TypeCheckResult = {
// First check whether left and right have the same type, then check if the type is acceptable.
- if (left.dataType != right.dataType) {
+ if (!left.dataType.sameType(right.dataType)) {
TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " +
s"(${left.dataType.simpleString} and ${right.dataType.simpleString}).")
} else if (!inputType.acceptsType(left.dataType)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 3fb7eeefba..33b3b78c9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1649,4 +1649,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
dates.except(widenTypedRows).collect()
dates.intersect(widenTypedRows).collect()
}
+
+ test("SPARK-18070 binary operator should not consider nullability when comparing input types") {
+ val rows = Seq(Row(Seq(1), Seq(1)))
+ val schema = new StructType()
+ .add("array1", ArrayType(IntegerType))
+ .add("array2", ArrayType(IntegerType, containsNull = false))
+ val df = spark.createDataFrame(spark.sparkContext.makeRDD(rows), schema)
+ assert(df.filter($"array1" === $"array2").count() == 1)
+ }
}