diff options
author | Cheng Hao <hao.cheng@intel.com> | 2014-07-25 01:30:22 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-07-25 01:30:22 -0700 |
commit | 184aa1c6c0ddf26b703bcabf55397ade17497465 (patch) | |
tree | 10213ed590cd5b6d6a76dfe8b838466d8315c272 /sql/catalyst | |
parent | eb82abd8e3d25c912fa75201cf4f429aab8d73c7 (diff) | |
download | spark-184aa1c6c0ddf26b703bcabf55397ade17497465.tar.gz spark-184aa1c6c0ddf26b703bcabf55397ade17497465.tar.bz2 spark-184aa1c6c0ddf26b703bcabf55397ade17497465.zip |
[SPARK-2665] [SQL] Add EqualNS & Unit Tests
Hive Supports the operator "<=>", which returns same result with EQUAL(=) operator for non-null operands, but returns TRUE if both are NULL, FALSE if one of the them is NULL.
Author: Cheng Hao <hao.cheng@intel.com>
Closes #1570 from chenghao-intel/equalns and squashes the following commits:
8d6c789 [Cheng Hao] Remove the test case orc_predicate_pushdown
5b2ca88 [Cheng Hao] Add cases into whitelist
8e66cdd [Cheng Hao] Rename the EqualNSTo ==> EqualNullSafe
7af4b0b [Cheng Hao] Add EqualNS & Unit Tests
Diffstat (limited to 'sql/catalyst')
5 files changed, 30 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 9887856b9c..67a8ce9b88 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -246,6 +246,8 @@ trait HiveTypeCoercion { // No need to change other EqualTo operators as that actually makes sense for boolean types. case e: EqualTo => e + // No need to change the EqualNullSafe operators, too + case e: EqualNullSafe => e // Otherwise turn them to Byte types so that there exists and ordering. case p: BinaryComparison if p.left.dataType == BooleanType && p.right.dataType == BooleanType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 15c98efbca..5c8c810d91 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -77,6 +77,7 @@ package object dsl { def > (other: Expression) = GreaterThan(expr, other) def >= (other: Expression) = GreaterThanOrEqual(expr, other) def === (other: Expression) = EqualTo(expr, other) + def <=> (other: Expression) = EqualNullSafe(expr, other) def !== (other: Expression) = Not(EqualTo(expr, other)) def in(list: Expression*) = In(expr, list) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index b63406b94a..06b94a98d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -153,6 +153,22 @@ case class EqualTo(left: Expression, right: Expression) extends BinaryComparison } } +case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison { + def symbol = "<=>" + override def nullable = false + override def eval(input: Row): Any = { + val l = left.eval(input) + val r = right.eval(input) + if (l == null && r == null) { + true + } else if (l == null || r == null) { + false + } else { + l == r + } + } +} + case class LessThan(left: Expression, right: Expression) extends BinaryComparison { def symbol = "<" override def eval(input: Row): Any = c2(input, left, right, _.lt(_, _)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index c65987b712..5f86d6047c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -153,6 +153,8 @@ object NullPropagation extends Rule[LogicalPlan] { case e @ GetItem(Literal(null, _), _) => Literal(null, e.dataType) case e @ GetItem(_, Literal(null, _)) => Literal(null, e.dataType) case e @ GetField(Literal(null, _), _) => Literal(null, e.dataType) + case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r) + case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l) // For Coalesce, remove null literals. case e @ Coalesce(children) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index c3f5c26fdb..58f8c341e6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -451,11 +451,13 @@ class ExpressionEvaluationSuite extends FunSuite { } test("BinaryComparison") { - val row = new GenericRow(Array[Any](1, 2, 3, null)) + val row = new GenericRow(Array[Any](1, 2, 3, null, 3, null)) val c1 = 'a.int.at(0) val c2 = 'a.int.at(1) val c3 = 'a.int.at(2) val c4 = 'a.int.at(3) + val c5 = 'a.int.at(4) + val c6 = 'a.int.at(5) checkEvaluation(LessThan(c1, c4), null, row) checkEvaluation(LessThan(c1, c2), true, row) @@ -469,6 +471,12 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation(c1 >= c2, false, row) checkEvaluation(c1 === c2, false, row) checkEvaluation(c1 !== c2, true, row) + checkEvaluation(c4 <=> c1, false, row) + checkEvaluation(c1 <=> c4, false, row) + checkEvaluation(c4 <=> c6, true, row) + checkEvaluation(c3 <=> c5, true, row) + checkEvaluation(Literal(true) <=> Literal(null, BooleanType), false, row) + checkEvaluation(Literal(null, BooleanType) <=> Literal(true), false, row) } test("StringComparison") { |