diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-10-25 12:08:17 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-10-25 12:08:17 -0700 |
commit | a21791e3164f4e6546fbe0a90017a4394a05deb1 (patch) | |
tree | f15bfd0026fc16fb87868ec943ed4eeddef37594 | |
parent | c5fe3dd4f59c464c830b414acccd3cca0fdd877c (diff) | |
download | spark-a21791e3164f4e6546fbe0a90017a4394a05deb1.tar.gz spark-a21791e3164f4e6546fbe0a90017a4394a05deb1.tar.bz2 spark-a21791e3164f4e6546fbe0a90017a4394a05deb1.zip |
[SPARK-18070][SQL] binary operator should not consider nullability when comparing input types
## What changes were proposed in this pull request?
Binary operator requires its inputs to be of same type, but it should not consider nullability, e.g. `EqualTo` should be able to compare an element-nullable array and an element-non-nullable array.
## How was this patch tested?
a regression test in `DataFrameSuite`
Author: Wenchen Fan <wenchen@databricks.com>
Closes #15606 from cloud-fan/type-bug.
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala | 2 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 |
2 files changed, 10 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index fa1a2ad56c..9edc1ceff2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -511,7 +511,7 @@ abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes { override def checkInputDataTypes(): TypeCheckResult = { // First check whether left and right have the same type, then check if the type is acceptable. - if (left.dataType != right.dataType) { + if (!left.dataType.sameType(right.dataType)) { TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " + s"(${left.dataType.simpleString} and ${right.dataType.simpleString}).") } else if (!inputType.acceptsType(left.dataType)) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 3fb7eeefba..33b3b78c9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1649,4 +1649,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { dates.except(widenTypedRows).collect() dates.intersect(widenTypedRows).collect() } + + test("SPARK-18070 binary operator should not consider nullability when comparing input types") { + val rows = Seq(Row(Seq(1), Seq(1))) + val schema = new StructType() + .add("array1", ArrayType(IntegerType)) + .add("array2", ArrayType(IntegerType, containsNull = false)) + val df = spark.createDataFrame(spark.sparkContext.makeRDD(rows), schema) + assert(df.filter($"array1" === $"array2").count() == 1) + } } |