aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Odersky <jakob@odersky.com>2016-10-07 17:24:22 -0700
committerJakob Odersky <jakob@odersky.com>2016-10-07 17:36:06 -0700
commit5f190eb49e2a70131637a1c439a73066cf612069 (patch)
tree9627acc7a7334845b5ce35800bf3757c074a718d
parent79accf45ace5549caa0cbab02f94fc87bedb5587 (diff)
downloadspark-5f190eb49e2a70131637a1c439a73066cf612069.tar.gz
spark-5f190eb49e2a70131637a1c439a73066cf612069.tar.bz2
spark-5f190eb49e2a70131637a1c439a73066cf612069.zip
Fix backslash escaping in 'LIKE' patterns.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala36
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala2
2 files changed, 19 insertions, 19 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index cde8bd5b96..5ff87b6e5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -25,26 +25,24 @@ object StringUtils {
// replace the _ with .{1} exactly match 1 time of any character
// replace the % with .*, match 0 or more times with any character
- def escapeLikeRegex(v: String): String = {
- if (!v.isEmpty) {
- "(?s)" + (' ' +: v.init).zip(v).flatMap {
- case (prev, '\\') => ""
- case ('\\', c) =>
- c match {
- case '_' => "_"
- case '%' => "%"
- case _ => Pattern.quote("\\" + c)
- }
- case (prev, c) =>
- c match {
- case '_' => "."
- case '%' => ".*"
- case _ => Pattern.quote(Character.toString(c))
- }
- }.mkString
- } else {
- v
+ def escapeLikeRegex(str: String): String = {
+ val builder = new StringBuilder()
+ str.foldLeft(false) { case (escaping, next) =>
+ if (escaping) {
+ builder ++= Pattern.quote(Character.toString(next))
+ false
+ } else if (next == '\\') {
+ true
+ } else {
+ builder ++= (next match {
+ case '_' => "."
+ case '%' => ".*"
+ case _ => Pattern.quote(Character.toString(next))
+ })
+ false
+ }
}
+ "(?s)" + builder.result() // (?s) enables dotall mode, causing "." to match new lines
}
private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 5299549e7b..9b42489cb2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -53,6 +53,8 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation("a\nb" like "a_b", true)
checkEvaluation("ab" like "a%b", true)
checkEvaluation("a\nb" like "a%b", true)
+
+ checkEvaluation("""\\\\""" like """%\\%""", true) // triple quotes to avoid java string escaping
}
test("LIKE Non-literal Regular Expression") {