aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorKousuke Saruta <sarutak@oss.nttdata.co.jp>2014-10-26 16:54:07 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-26 16:54:07 -0700
commit3a9d66cf59ab7c9aee090e4c6067c73510e2ac26 (patch)
tree52556058b5872274204874ae76d71e6a7578b42e /sql/catalyst
parentace41e8bf2f4538115b28b90757b8424ca614682 (diff)
downloadspark-3a9d66cf59ab7c9aee090e4c6067c73510e2ac26.tar.gz
spark-3a9d66cf59ab7c9aee090e4c6067c73510e2ac26.tar.bz2
spark-3a9d66cf59ab7c9aee090e4c6067c73510e2ac26.zip
[SPARK-4061][SQL] We cannot use EOL character in the operand of LIKE predicate.
We cannot use EOL character like \n or \r in the operand of LIKE predicate. So following condition is never true. -- someStr is 'hoge\nfuga' where someStr LIKE 'hoge_fuga' Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp> Closes #2908 from sarutak/spark-sql-like-match-modification and squashes the following commits: d15798b [Kousuke Saruta] Remove test setting for thriftserver f99a2f4 [Kousuke Saruta] Fixed LIKE predicate so that we can use EOL character as in a operand
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala42
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala6
2 files changed, 25 insertions, 23 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index c2a3a5ca3c..f634976776 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression)
// replace the _ with .{1} exactly match 1 time of any character
// replace the % with .*, match 0 or more times with any character
- override def escape(v: String) = {
- val sb = new StringBuilder()
- var i = 0;
- while (i < v.length) {
- // Make a special case for "\\_" and "\\%"
- val n = v.charAt(i);
- if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) {
- sb.append(v.charAt(i + 1))
- i += 1
- } else {
- if (n == '_') {
- sb.append(".");
- } else if (n == '%') {
- sb.append(".*");
- } else {
- sb.append(Pattern.quote(Character.toString(n)));
- }
- }
-
- i += 1
+ override def escape(v: String) =
+ if (!v.isEmpty) {
+ "(?s)" + (' ' +: v.init).zip(v).flatMap {
+ case (prev, '\\') => ""
+ case ('\\', c) =>
+ c match {
+ case '_' => "_"
+ case '%' => "%"
+ case _ => Pattern.quote("\\" + c)
+ }
+ case (prev, c) =>
+ c match {
+ case '_' => "."
+ case '%' => ".*"
+ case _ => Pattern.quote(Character.toString(c))
+ }
+ }.mkString
+ } else {
+ v
}
- sb.toString()
- }
-
override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index f134d73450..53c53481f9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like "a%", true)
checkEvaluation("abc" like "b%", false)
checkEvaluation("abc" like "bc%", false)
+ checkEvaluation("a\nb" like "a_b", true)
+ checkEvaluation("ab" like "a%b", true)
+ checkEvaluation("a\nb" like "a%b", true)
}
test("LIKE Non-literal Regular Expression") {
@@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
+ checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
+ checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
+ checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))
checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%")))
}