From 7594b3f94388a04bfe10743855ef0cb810e482d3 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Fri, 20 Jun 2014 00:34:59 -0700 Subject: [SPARK-2218] rename Equals to EqualTo in Spark SQL expressions. Due to the existence of scala.Equals, it is very error prone to name the expression Equals, especially because we use a lot of partial functions and pattern matching in the optimizer. Note that this sits on top of #1144. Author: Reynold Xin Closes #1146 from rxin/equals and squashes the following commits: f8583fd [Reynold Xin] Merge branch 'master' of github.com:apache/spark into equals 326b388 [Reynold Xin] Merge branch 'master' of github.com:apache/spark into equals bd19807 [Reynold Xin] Rename EqualsTo to EqualTo. 81148d1 [Reynold Xin] [SPARK-2218] rename Equals to EqualsTo in Spark SQL expressions. c4e543d [Reynold Xin] [SPARK-2210] boolean cast on boolean value should be removed. (cherry picked from commit 2f6a835e1a039a0b1ba6e184b3350444b70f91df) Signed-off-by: Reynold Xin --- .../src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala | 6 +++--- .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 9 ++++++--- .../main/scala/org/apache/spark/sql/catalyst/dsl/package.scala | 6 +++--- .../org/apache/spark/sql/catalyst/expressions/package.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/predicates.scala | 4 ++-- .../scala/org/apache/spark/sql/catalyst/planning/patterns.scala | 6 +++--- .../spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala | 4 ++-- 7 files changed, 20 insertions(+), 17 deletions(-) (limited to 'sql/catalyst') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 2ad2d04af5..0cc4592047 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -258,13 +258,13 @@ class SqlParser extends StandardTokenParsers with PackratParsers { comparisonExpression * (AND ^^^ { (e1: Expression, e2: Expression) => And(e1,e2) }) protected lazy val comparisonExpression: Parser[Expression] = - termExpression ~ "=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Equals(e1, e2) } | + termExpression ~ "=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => EqualTo(e1, e2) } | termExpression ~ "<" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThan(e1, e2) } | termExpression ~ "<=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => LessThanOrEqual(e1, e2) } | termExpression ~ ">" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThan(e1, e2) } | termExpression ~ ">=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThanOrEqual(e1, e2) } | - termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } | - termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } | + termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } | + termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(EqualTo(e1, e2)) } | termExpression ~ RLIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } | termExpression ~ REGEXP ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } | termExpression ~ LIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => Like(e1, e2) } | diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index c0714bcdd0..76ddeba9cb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -234,8 +234,8 @@ trait HiveTypeCoercion { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { // Skip nodes who's children have not been resolved yet. case e if !e.childrenResolved => e - // No need to change Equals operators as that actually makes sense for boolean types. - case e: Equals => e + // No need to change EqualTo operators as that actually makes sense for boolean types. + case e: EqualTo => e // Otherwise turn them to Byte types so that there exists and ordering. case p: BinaryComparison if p.left.dataType == BooleanType && p.right.dataType == BooleanType => @@ -254,7 +254,10 @@ trait HiveTypeCoercion { // Skip if the type is boolean type already. Note that this extra cast should be removed // by optimizer.SimplifyCasts. case Cast(e, BooleanType) if e.dataType == BooleanType => e - case Cast(e, BooleanType) => Not(Equals(e, Literal(0))) + // If the data type is not boolean and is being cast boolean, turn it into a comparison + // with the numeric value, i.e. x != 0. This will coerce the type into numeric type. + case Cast(e, BooleanType) if e.dataType != BooleanType => Not(EqualTo(e, Literal(0))) + // Turn true into 1, and false into 0 if casting boolean into other types. case Cast(e, dataType) if e.dataType == BooleanType => Cast(If(e, Literal(1), Literal(0)), dataType) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index d177339d40..26ad4837b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.types._ * * // These unresolved attributes can be used to create more complicated expressions. * scala> 'a === 'b - * res2: org.apache.spark.sql.catalyst.expressions.Equals = ('a = 'b) + * res2: org.apache.spark.sql.catalyst.expressions.EqualTo = ('a = 'b) * * // SQL verbs can be used to construct logical query plans. * scala> import org.apache.spark.sql.catalyst.plans.logical._ @@ -76,8 +76,8 @@ package object dsl { def <= (other: Expression) = LessThanOrEqual(expr, other) def > (other: Expression) = GreaterThan(expr, other) def >= (other: Expression) = GreaterThanOrEqual(expr, other) - def === (other: Expression) = Equals(expr, other) - def !== (other: Expression) = Not(Equals(expr, other)) + def === (other: Expression) = EqualTo(expr, other) + def !== (other: Expression) = Not(EqualTo(expr, other)) def like(other: Expression) = Like(expr, other) def rlike(other: Expression) = RLike(expr, other) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 573ec052f4..b6f2451b52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -24,7 +24,7 @@ package org.apache.spark.sql.catalyst * expression, a [[NamedExpression]] in addition to the standard collection of expressions. * * ==Standard Expressions== - * A library of standard expressions (e.g., [[Add]], [[Equals]]), aggregates (e.g., SUM, COUNT), + * A library of standard expressions (e.g., [[Add]], [[EqualTo]]), aggregates (e.g., SUM, COUNT), * and other computations (e.g. UDFs). Each expression type is capable of determining its output * schema as a function of its children's output schema. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 2718d43646..b63406b94a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -52,7 +52,7 @@ trait PredicateHelper { * * For example consider a join between two relations R(a, b) and S(c, d). * - * `canEvaluate(Equals(a,b), R)` returns `true` where as `canEvaluate(Equals(a,c), R)` returns + * `canEvaluate(EqualTo(a,b), R)` returns `true` where as `canEvaluate(EqualTo(a,c), R)` returns * `false`. */ protected def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean = @@ -140,7 +140,7 @@ abstract class BinaryComparison extends BinaryPredicate { self: Product => } -case class Equals(left: Expression, right: Expression) extends BinaryComparison { +case class EqualTo(left: Expression, right: Expression) extends BinaryComparison { def symbol = "=" override def eval(input: Row): Any = { val l = left.eval(input) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index 820ecfb78b..a43bef389c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -136,14 +136,14 @@ object HashFilteredJoin extends Logging with PredicateHelper { val Join(left, right, joinType, _) = join val (joinPredicates, otherPredicates) = allPredicates.flatMap(splitConjunctivePredicates).partition { - case Equals(l, r) if (canEvaluate(l, left) && canEvaluate(r, right)) || + case EqualTo(l, r) if (canEvaluate(l, left) && canEvaluate(r, right)) || (canEvaluate(l, right) && canEvaluate(r, left)) => true case _ => false } val joinKeys = joinPredicates.map { - case Equals(l, r) if canEvaluate(l, left) && canEvaluate(r, right) => (l, r) - case Equals(l, r) if canEvaluate(l, right) && canEvaluate(r, left) => (r, l) + case EqualTo(l, r) if canEvaluate(l, left) && canEvaluate(r, right) => (l, r) + case EqualTo(l, r) if canEvaluate(l, right) && canEvaluate(r, left) => (r, l) } // Do not consider this strategy if there are no join keys. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index cea97c584f..0ff8206401 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -195,8 +195,8 @@ class ConstantFoldingSuite extends PlanTest { Add(Literal(null, IntegerType), 1) as 'c9, Add(1, Literal(null, IntegerType)) as 'c10, - Equals(Literal(null, IntegerType), 1) as 'c11, - Equals(1, Literal(null, IntegerType)) as 'c12, + EqualTo(Literal(null, IntegerType), 1) as 'c11, + EqualTo(1, Literal(null, IntegerType)) as 'c12, Like(Literal(null, StringType), "abc") as 'c13, Like("abc", Literal(null, StringType)) as 'c14, -- cgit v1.2.3