aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpetermaxlee <petermaxlee@gmail.com>2016-08-18 13:44:13 +0200
committerHerman van Hovell <hvanhovell@databricks.com>2016-08-18 13:44:13 +0200
commit68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b (patch)
treebf354d1a234d37e7458451000778ca9f474d07a7
parent412dba63b511474a6db3c43c8618d803e604bc6b (diff)
downloadspark-68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b.tar.gz
spark-68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b.tar.bz2
spark-68f5087d2107d6afec5d5745f0cb0e9e3bdd6a0b.zip
[SPARK-17117][SQL] 1 / NULL should not fail analysis
## What changes were proposed in this pull request? This patch fixes the problem described in SPARK-17117, i.e. "SELECT 1 / NULL" throws an analysis exception: ``` org.apache.spark.sql.AnalysisException: cannot resolve '(1 / NULL)' due to data type mismatch: differing types in '(1 / NULL)' (int and null). ``` The problem is that division type coercion did not take null type into account. ## How was this patch tested? A unit test for the type coercion, and a few end-to-end test cases using SQLQueryTestSuite. Author: petermaxlee <petermaxlee@gmail.com> Closes #14695 from petermaxlee/SPARK-17117.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala7
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala9
-rw-r--r--sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql12
-rw-r--r--sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out84
4 files changed, 89 insertions, 23 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 021952e716..21e96aaf53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -543,11 +543,14 @@ object TypeCoercion {
// Decimal and Double remain the same
case d: Divide if d.dataType == DoubleType => d
case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
- case Divide(left, right) if isNumeric(left) && isNumeric(right) =>
+ case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) =>
Divide(Cast(left, DoubleType), Cast(right, DoubleType))
}
- private def isNumeric(ex: Expression): Boolean = ex.dataType.isInstanceOf[NumericType]
+ private def isNumericOrNull(ex: Expression): Boolean = {
+ // We need to handle null types in case a query contains null literals.
+ ex.dataType.isInstanceOf[NumericType] || ex.dataType == NullType
+ }
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index a13c45fe2f..9560563a8c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
import java.sql.Timestamp
-import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{Division, FunctionArgumentConversion}
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -730,6 +730,13 @@ class TypeCoercionSuite extends PlanTest {
// the right expression to Decimal.
ruleTest(rules, sum(Divide(Decimal(4.0), 3)), sum(Divide(Decimal(4.0), 3)))
}
+
+ test("SPARK-17117 null type coercion in divide") {
+ val rules = Seq(FunctionArgumentConversion, Division, ImplicitTypeCasts)
+ val nullLit = Literal.create(null, NullType)
+ ruleTest(rules, Divide(1L, nullLit), Divide(Cast(1L, DoubleType), Cast(nullLit, DoubleType)))
+ ruleTest(rules, Divide(nullLit, 1L), Divide(Cast(nullLit, DoubleType), Cast(1L, DoubleType)))
+ }
}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
index cbe40410cd..f62b10ca00 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
@@ -16,11 +16,19 @@ select + + 100;
select - - max(key) from testdata;
select + - key from testdata where key = 33;
+-- div
+select 5 / 2;
+select 5 / 0;
+select 5 / null;
+select null / 5;
+select 5 div 2;
+select 5 div 0;
+select 5 div null;
+select null div 5;
+
-- other arithmetics
select 1 + 2;
select 1 - 2;
select 2 * 5;
-select 5 / 2;
-select 5 div 2;
select 5 % 3;
select pmod(-7, 3);
diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
index f2b40a00d0..6abe048af4 100644
--- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
+-- Number of queries: 28
-- !query 0
@@ -123,35 +123,35 @@ struct<(- key):int>
-- !query 15
-select 1 + 2
+select 5 / 2
-- !query 15 schema
-struct<(1 + 2):int>
+struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
-- !query 15 output
-3
+2.5
-- !query 16
-select 1 - 2
+select 5 / 0
-- !query 16 schema
-struct<(1 - 2):int>
+struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double>
-- !query 16 output
--1
+NULL
-- !query 17
-select 2 * 5
+select 5 / null
-- !query 17 schema
-struct<(2 * 5):int>
+struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double>
-- !query 17 output
-10
+NULL
-- !query 18
-select 5 / 2
+select null / 5
-- !query 18 schema
-struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
+struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double>
-- !query 18 output
-2.5
+NULL
-- !query 19
@@ -163,16 +163,64 @@ struct<CAST((CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)) AS BIGINT):bigint>
-- !query 20
-select 5 % 3
+select 5 div 0
-- !query 20 schema
-struct<(5 % 3):int>
+struct<CAST((CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)) AS BIGINT):bigint>
-- !query 20 output
-2
+NULL
-- !query 21
-select pmod(-7, 3)
+select 5 div null
-- !query 21 schema
-struct<pmod(-7, 3):int>
+struct<CAST((CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)) AS BIGINT):bigint>
-- !query 21 output
+NULL
+
+
+-- !query 22
+select null div 5
+-- !query 22 schema
+struct<CAST((CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)) AS BIGINT):bigint>
+-- !query 22 output
+NULL
+
+
+-- !query 23
+select 1 + 2
+-- !query 23 schema
+struct<(1 + 2):int>
+-- !query 23 output
+3
+
+
+-- !query 24
+select 1 - 2
+-- !query 24 schema
+struct<(1 - 2):int>
+-- !query 24 output
+-1
+
+
+-- !query 25
+select 2 * 5
+-- !query 25 schema
+struct<(2 * 5):int>
+-- !query 25 output
+10
+
+
+-- !query 26
+select 5 % 3
+-- !query 26 schema
+struct<(5 % 3):int>
+-- !query 26 output
+2
+
+
+-- !query 27
+select pmod(-7, 3)
+-- !query 27 schema
+struct<pmod(-7, 3):int>
+-- !query 27 output
2