diff options
author | petermaxlee <petermaxlee@gmail.com> | 2016-08-18 13:44:13 +0200 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-08-18 13:44:13 +0200 |
commit | 68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b (patch) | |
tree | bf354d1a234d37e7458451000778ca9f474d07a7 | |
parent | 412dba63b511474a6db3c43c8618d803e604bc6b (diff) | |
download | spark-68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b.tar.gz spark-68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b.tar.bz2 spark-68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b.zip |
[SPARK-17117][SQL] 1 / NULL should not fail analysis
## What changes were proposed in this pull request?
This patch fixes the problem described in SPARK-17117, i.e. "SELECT 1 / NULL" throws an analysis exception:
```
org.apache.spark.sql.AnalysisException: cannot resolve '(1 / NULL)' due to data type mismatch: differing types in '(1 / NULL)' (int and null).
```
The problem is that division type coercion did not take null type into account.
## How was this patch tested?
A unit test for the type coercion, and a few end-to-end test cases using SQLQueryTestSuite.
Author: petermaxlee <petermaxlee@gmail.com>
Closes #14695 from petermaxlee/SPARK-17117.
4 files changed, 89 insertions, 23 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 021952e716..21e96aaf53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -543,11 +543,14 @@ object TypeCoercion { // Decimal and Double remain the same case d: Divide if d.dataType == DoubleType => d case d: Divide if d.dataType.isInstanceOf[DecimalType] => d - case Divide(left, right) if isNumeric(left) && isNumeric(right) => + case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) => Divide(Cast(left, DoubleType), Cast(right, DoubleType)) } - private def isNumeric(ex: Expression): Boolean = ex.dataType.isInstanceOf[NumericType] + private def isNumericOrNull(ex: Expression): Boolean = { + // We need to handle null types in case a query contains null literals. + ex.dataType.isInstanceOf[NumericType] || ex.dataType == NullType + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index a13c45fe2f..9560563a8c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis import java.sql.Timestamp -import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{Division, FunctionArgumentConversion} +import org.apache.spark.sql.catalyst.analysis.TypeCoercion._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest @@ -730,6 +730,13 @@ class TypeCoercionSuite extends PlanTest { // the right expression to Decimal. ruleTest(rules, sum(Divide(Decimal(4.0), 3)), sum(Divide(Decimal(4.0), 3))) } + + test("SPARK-17117 null type coercion in divide") { + val rules = Seq(FunctionArgumentConversion, Division, ImplicitTypeCasts) + val nullLit = Literal.create(null, NullType) + ruleTest(rules, Divide(1L, nullLit), Divide(Cast(1L, DoubleType), Cast(nullLit, DoubleType))) + ruleTest(rules, Divide(nullLit, 1L), Divide(Cast(nullLit, DoubleType), Cast(1L, DoubleType))) + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql index cbe40410cd..f62b10ca00 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql @@ -16,11 +16,19 @@ select + + 100; select - - max(key) from testdata; select + - key from testdata where key = 33; +-- div +select 5 / 2; +select 5 / 0; +select 5 / null; +select null / 5; +select 5 div 2; +select 5 div 0; +select 5 div null; +select null div 5; + -- other arithmetics select 1 + 2; select 1 - 2; select 2 * 5; -select 5 / 2; -select 5 div 2; select 5 % 3; select pmod(-7, 3); diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out index f2b40a00d0..6abe048af4 100644 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 22 +-- Number of queries: 28 -- !query 0 @@ -123,35 +123,35 @@ struct<(- key):int> -- !query 15 -select 1 + 2 +select 5 / 2 -- !query 15 schema -struct<(1 + 2):int> +struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double> -- !query 15 output -3 +2.5 -- !query 16 -select 1 - 2 +select 5 / 0 -- !query 16 schema -struct<(1 - 2):int> +struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double> -- !query 16 output --1 +NULL -- !query 17 -select 2 * 5 +select 5 / null -- !query 17 schema -struct<(2 * 5):int> +struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double> -- !query 17 output -10 +NULL -- !query 18 -select 5 / 2 +select null / 5 -- !query 18 schema -struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double> +struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double> -- !query 18 output -2.5 +NULL -- !query 19 @@ -163,16 +163,64 @@ struct<CAST((CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)) AS BIGINT):bigint> -- !query 20 -select 5 % 3 +select 5 div 0 -- !query 20 schema -struct<(5 % 3):int> +struct<CAST((CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)) AS BIGINT):bigint> -- !query 20 output -2 +NULL -- !query 21 -select pmod(-7, 3) +select 5 div null -- !query 21 schema -struct<pmod(-7, 3):int> +struct<CAST((CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)) AS BIGINT):bigint> -- !query 21 output +NULL + + +-- !query 22 +select null div 5 +-- !query 22 schema +struct<CAST((CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)) AS BIGINT):bigint> +-- !query 22 output +NULL + + +-- !query 23 +select 1 + 2 +-- !query 23 schema +struct<(1 + 2):int> +-- !query 23 output +3 + + +-- !query 24 +select 1 - 2 +-- !query 24 schema +struct<(1 - 2):int> +-- !query 24 output +-1 + + +-- !query 25 +select 2 * 5 +-- !query 25 schema +struct<(2 * 5):int> +-- !query 25 output +10 + + +-- !query 26 +select 5 % 3 +-- !query 26 schema +struct<(5 % 3):int> +-- !query 26 output +2 + + +-- !query 27 +select pmod(-7, 3) +-- !query 27 schema +struct<pmod(-7, 3):int> +-- !query 27 output 2 |