diff options
author | Kousuke Saruta <sarutak@oss.nttdata.co.jp> | 2014-10-26 16:54:07 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-10-26 16:54:07 -0700 |
commit | 3a9d66cf59ab7c9aee090e4c6067c73510e2ac26 (patch) | |
tree | 52556058b5872274204874ae76d71e6a7578b42e /sql | |
parent | ace41e8bf2f4538115b28b90757b8424ca614682 (diff) | |
download | spark-3a9d66cf59ab7c9aee090e4c6067c73510e2ac26.tar.gz spark-3a9d66cf59ab7c9aee090e4c6067c73510e2ac26.tar.bz2 spark-3a9d66cf59ab7c9aee090e4c6067c73510e2ac26.zip |
[SPARK-4061][SQL] We cannot use EOL character in the operand of LIKE predicate.
We cannot use EOL character like \n or \r in the operand of LIKE predicate.
So following condition is never true.
-- someStr is 'hoge\nfuga'
where someStr LIKE 'hoge_fuga'
Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Closes #2908 from sarutak/spark-sql-like-match-modification and squashes the following commits:
d15798b [Kousuke Saruta] Remove test setting for thriftserver
f99a2f4 [Kousuke Saruta] Fixed LIKE predicate so that we can use EOL character as in a operand
Diffstat (limited to 'sql')
2 files changed, 25 insertions, 23 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index c2a3a5ca3c..f634976776 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression) // replace the _ with .{1} exactly match 1 time of any character // replace the % with .*, match 0 or more times with any character - override def escape(v: String) = { - val sb = new StringBuilder() - var i = 0; - while (i < v.length) { - // Make a special case for "\\_" and "\\%" - val n = v.charAt(i); - if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) { - sb.append(v.charAt(i + 1)) - i += 1 - } else { - if (n == '_') { - sb.append("."); - } else if (n == '%') { - sb.append(".*"); - } else { - sb.append(Pattern.quote(Character.toString(n))); - } - } - - i += 1 + override def escape(v: String) = + if (!v.isEmpty) { + "(?s)" + (' ' +: v.init).zip(v).flatMap { + case (prev, '\\') => "" + case ('\\', c) => + c match { + case '_' => "_" + case '%' => "%" + case _ => Pattern.quote("\\" + c) + } + case (prev, c) => + c match { + case '_' => "." + case '%' => ".*" + case _ => Pattern.quote(Character.toString(c)) + } + }.mkString + } else { + v } - sb.toString() - } - override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches() } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index f134d73450..53c53481f9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation("abc" like "a%", true) checkEvaluation("abc" like "b%", false) checkEvaluation("abc" like "bc%", false) + checkEvaluation("a\nb" like "a_b", true) + checkEvaluation("ab" like "a%b", true) + checkEvaluation("a\nb" like "a%b", true) } test("LIKE Non-literal Regular Expression") { @@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite { checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%"))) checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%"))) checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%"))) + checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b"))) + checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b"))) + checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b"))) checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%"))) } |