aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorTakuya UESHIN <ueshin@happy-camper.st>2014-07-10 19:20:00 -0700
committerMichael Armbrust <michael@databricks.com>2014-07-10 19:20:00 -0700
commitf62c42728990266d5d5099abe241f699189ba025 (patch)
treefb090e9ca9a50d1ec29f3137e3975d5fcefe8bdf /sql
parentae8ca4dfbacd5a5197fb41722607ad99c190f768 (diff)
downloadspark-f62c42728990266d5d5099abe241f699189ba025.tar.gz
spark-f62c42728990266d5d5099abe241f699189ba025.tar.bz2
spark-f62c42728990266d5d5099abe241f699189ba025.zip
[SPARK-2431][SQL] Refine StringComparison and related codes.
Refine `StringComparison` and related codes as follows: - `StringComparison` could be similar to `StringRegexExpression` or `CaseConversionExpression`. - Nullability of `StringRegexExpression` could depend on children's nullabilities. - Add a case that the like condition includes no wildcard to `LikeSimplification`. Author: Takuya UESHIN <ueshin@happy-camper.st> Closes #1357 from ueshin/issues/SPARK-2431 and squashes the following commits: 77766f5 [Takuya UESHIN] Add a case that the like condition includes no wildcard to LikeSimplification. b9da9d2 [Takuya UESHIN] Fix nullability of StringRegexExpression. 680bb72 [Takuya UESHIN] Refine StringComparison.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala28
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala3
2 files changed, 16 insertions, 15 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 347471cebd..b3850533c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.types.DataType
import org.apache.spark.sql.catalyst.types.StringType
import org.apache.spark.sql.catalyst.types.BooleanType
-
trait StringRegexExpression {
self: BinaryExpression =>
@@ -32,7 +31,7 @@ trait StringRegexExpression {
def escape(v: String): String
def matches(regex: Pattern, str: String): Boolean
- def nullable: Boolean = true
+ def nullable: Boolean = left.nullable || right.nullable
def dataType: DataType = BooleanType
// try cache the pattern for Literal
@@ -157,19 +156,13 @@ case class Lower(child: Expression) extends UnaryExpression with CaseConversionE
override def toString() = s"Lower($child)"
}
-/** A base class for functions that compare two strings, returning a boolean. */
-abstract class StringComparison extends Expression {
- self: Product =>
+/** A base trait for functions that compare two strings, returning a boolean. */
+trait StringComparison {
+ self: BinaryExpression =>
type EvaluatedType = Any
- def left: Expression
- def right: Expression
-
- override def references = children.flatMap(_.references).toSet
- override def children = left :: right :: Nil
-
- override def nullable: Boolean = true
+ def nullable: Boolean = left.nullable || right.nullable
override def dataType: DataType = BooleanType
def compare(l: String, r: String): Boolean
@@ -184,26 +177,31 @@ abstract class StringComparison extends Expression {
}
}
+ def symbol: String = nodeName
+
override def toString() = s"$nodeName($left, $right)"
}
/**
* A function that returns true if the string `left` contains the string `right`.
*/
-case class Contains(left: Expression, right: Expression) extends StringComparison {
+case class Contains(left: Expression, right: Expression)
+ extends BinaryExpression with StringComparison {
override def compare(l: String, r: String) = l.contains(r)
}
/**
* A function that returns true if the string `left` starts with the string `right`.
*/
-case class StartsWith(left: Expression, right: Expression) extends StringComparison {
+case class StartsWith(left: Expression, right: Expression)
+ extends BinaryExpression with StringComparison {
def compare(l: String, r: String) = l.startsWith(r)
}
/**
* A function that returns true if the string `left` ends with the string `right`.
*/
-case class EndsWith(left: Expression, right: Expression) extends StringComparison {
+case class EndsWith(left: Expression, right: Expression)
+ extends BinaryExpression with StringComparison {
def compare(l: String, r: String) = l.endsWith(r)
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f0904f59d0..a142310c50 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -123,6 +123,7 @@ object LikeSimplification extends Rule[LogicalPlan] {
val startsWith = "([^_%]+)%".r
val endsWith = "%([^_%]+)".r
val contains = "%([^_%]+)%".r
+ val equalTo = "([^_%]*)".r
def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
case Like(l, Literal(startsWith(pattern), StringType)) if !pattern.endsWith("\\") =>
@@ -131,6 +132,8 @@ object LikeSimplification extends Rule[LogicalPlan] {
EndsWith(l, Literal(pattern))
case Like(l, Literal(contains(pattern), StringType)) if !pattern.endsWith("\\") =>
Contains(l, Literal(pattern))
+ case Like(l, Literal(equalTo(pattern), StringType)) =>
+ EqualTo(l, Literal(pattern))
}
}