From 5f190eb49e2a70131637a1c439a73066cf612069 Mon Sep 17 00:00:00 2001 From: Jakob Odersky Date: Fri, 7 Oct 2016 17:24:22 -0700 Subject: Fix backslash escaping in 'LIKE' patterns. --- .../spark/sql/catalyst/util/StringUtils.scala | 36 ++++++++++------------ .../expressions/RegexpExpressionsSuite.scala | 2 ++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index cde8bd5b96..5ff87b6e5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -25,26 +25,24 @@ object StringUtils { // replace the _ with .{1} exactly match 1 time of any character // replace the % with .*, match 0 or more times with any character - def escapeLikeRegex(v: String): String = { - if (!v.isEmpty) { - "(?s)" + (' ' +: v.init).zip(v).flatMap { - case (prev, '\\') => "" - case ('\\', c) => - c match { - case '_' => "_" - case '%' => "%" - case _ => Pattern.quote("\\" + c) - } - case (prev, c) => - c match { - case '_' => "." - case '%' => ".*" - case _ => Pattern.quote(Character.toString(c)) - } - }.mkString - } else { - v + def escapeLikeRegex(str: String): String = { + val builder = new StringBuilder() + str.foldLeft(false) { case (escaping, next) => + if (escaping) { + builder ++= Pattern.quote(Character.toString(next)) + false + } else if (next == '\\') { + true + } else { + builder ++= (next match { + case '_' => "." + case '%' => ".*" + case _ => Pattern.quote(Character.toString(next)) + }) + false + } } + "(?s)" + builder.result() // (?s) enables dotall mode, causing "." to match new lines } private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 5299549e7b..9b42489cb2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -53,6 +53,8 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation("a\nb" like "a_b", true) checkEvaluation("ab" like "a%b", true) checkEvaluation("a\nb" like "a%b", true) + + checkEvaluation("""\\\\""" like """%\\%""", true) // triple quotes to avoid java string escaping } test("LIKE Non-literal Regular Expression") { -- cgit v1.2.3