aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2014-07-25 01:30:22 -0700
committerMichael Armbrust <michael@databricks.com>2014-07-25 01:30:22 -0700
commit184aa1c6c0ddf26b703bcabf55397ade17497465 (patch)
tree10213ed590cd5b6d6a76dfe8b838466d8315c272 /sql/catalyst
parenteb82abd8e3d25c912fa75201cf4f429aab8d73c7 (diff)
downloadspark-184aa1c6c0ddf26b703bcabf55397ade17497465.tar.gz
spark-184aa1c6c0ddf26b703bcabf55397ade17497465.tar.bz2
spark-184aa1c6c0ddf26b703bcabf55397ade17497465.zip
[SPARK-2665] [SQL] Add EqualNS & Unit Tests
Hive Supports the operator "<=>", which returns same result with EQUAL(=) operator for non-null operands, but returns TRUE if both are NULL, FALSE if one of the them is NULL. Author: Cheng Hao <hao.cheng@intel.com> Closes #1570 from chenghao-intel/equalns and squashes the following commits: 8d6c789 [Cheng Hao] Remove the test case orc_predicate_pushdown 5b2ca88 [Cheng Hao] Add cases into whitelist 8e66cdd [Cheng Hao] Rename the EqualNSTo ==> EqualNullSafe 7af4b0b [Cheng Hao] Add EqualNS & Unit Tests
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala16
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala10
5 files changed, 30 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 9887856b9c..67a8ce9b88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -246,6 +246,8 @@ trait HiveTypeCoercion {
// No need to change other EqualTo operators as that actually makes sense for boolean types.
case e: EqualTo => e
+ // No need to change the EqualNullSafe operators, too
+ case e: EqualNullSafe => e
// Otherwise turn them to Byte types so that there exists and ordering.
case p: BinaryComparison
if p.left.dataType == BooleanType && p.right.dataType == BooleanType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 15c98efbca..5c8c810d91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -77,6 +77,7 @@ package object dsl {
def > (other: Expression) = GreaterThan(expr, other)
def >= (other: Expression) = GreaterThanOrEqual(expr, other)
def === (other: Expression) = EqualTo(expr, other)
+ def <=> (other: Expression) = EqualNullSafe(expr, other)
def !== (other: Expression) = Not(EqualTo(expr, other))
def in(list: Expression*) = In(expr, list)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index b63406b94a..06b94a98d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -153,6 +153,22 @@ case class EqualTo(left: Expression, right: Expression) extends BinaryComparison
}
}
+case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
+ def symbol = "<=>"
+ override def nullable = false
+ override def eval(input: Row): Any = {
+ val l = left.eval(input)
+ val r = right.eval(input)
+ if (l == null && r == null) {
+ true
+ } else if (l == null || r == null) {
+ false
+ } else {
+ l == r
+ }
+ }
+}
+
case class LessThan(left: Expression, right: Expression) extends BinaryComparison {
def symbol = "<"
override def eval(input: Row): Any = c2(input, left, right, _.lt(_, _))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c65987b712..5f86d6047c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -153,6 +153,8 @@ object NullPropagation extends Rule[LogicalPlan] {
case e @ GetItem(Literal(null, _), _) => Literal(null, e.dataType)
case e @ GetItem(_, Literal(null, _)) => Literal(null, e.dataType)
case e @ GetField(Literal(null, _), _) => Literal(null, e.dataType)
+ case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r)
+ case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l)
// For Coalesce, remove null literals.
case e @ Coalesce(children) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index c3f5c26fdb..58f8c341e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -451,11 +451,13 @@ class ExpressionEvaluationSuite extends FunSuite {
}
test("BinaryComparison") {
- val row = new GenericRow(Array[Any](1, 2, 3, null))
+ val row = new GenericRow(Array[Any](1, 2, 3, null, 3, null))
val c1 = 'a.int.at(0)
val c2 = 'a.int.at(1)
val c3 = 'a.int.at(2)
val c4 = 'a.int.at(3)
+ val c5 = 'a.int.at(4)
+ val c6 = 'a.int.at(5)
checkEvaluation(LessThan(c1, c4), null, row)
checkEvaluation(LessThan(c1, c2), true, row)
@@ -469,6 +471,12 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation(c1 >= c2, false, row)
checkEvaluation(c1 === c2, false, row)
checkEvaluation(c1 !== c2, true, row)
+ checkEvaluation(c4 <=> c1, false, row)
+ checkEvaluation(c1 <=> c4, false, row)
+ checkEvaluation(c4 <=> c6, true, row)
+ checkEvaluation(c3 <=> c5, true, row)
+ checkEvaluation(Literal(true) <=> Literal(null, BooleanType), false, row)
+ checkEvaluation(Literal(null, BooleanType) <=> Literal(true), false, row)
}
test("StringComparison") {