From 405b63bdceb6ff26864e65b84da4cd5af45aec48 Mon Sep 17 00:00:00 2001 From: Jakob Odersky Date: Thu, 13 Oct 2016 11:36:43 -0700 Subject: Add more documentation --- .../catalyst/expressions/regexpExpressions.scala | 9 ++++++++- .../expressions/RegexpExpressionsSuite.scala | 22 +++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index d25da3fd58..4147ace511 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -68,7 +68,14 @@ trait StringRegexExpression extends ImplicitCastInputTypes { * Simple RegEx pattern matching function */ @ExpressionDescription( - usage = "str _FUNC_ pattern - Returns true if str matches pattern and false otherwise.") + usage = "str _FUNC_ pattern - Returns true if str matches pattern, null if any arguments are null, false otherwise.", + extended = + "The pattern is a string which is matched literally, with exception to the following symbols:\n" + + " _ matches any one character in the input (similar to . in posix regular expressions)\n" + + " % matches zero ore more characters in the input (similar to .* in posix regular expressions)\n" + + "The default escape character is '\\'. Any character after the escape character will be matched against literally. " + + "Ending a pattern in an escape character is invalid and will throw a ?? exception.\n" + + "Use RLIKE to match with standard regular expressions.") case class Like(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 9316f76190..9db3abacfa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.types.StringType +import org.apache.spark.sql.types.{IntegerType, StringType} /** * Unit tests for regular expression (regexp) related SQL expressions. @@ -54,6 +54,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation("ab" like "a%b", true) checkEvaluation("a\nb" like "a%b", true) + // SI-17647 double-escaping backslash checkEvaluation("""\\\\""" like """%\\%""", true) // triple quotes to avoid java string escaping checkEvaluation("""\_%""" like """%\\__""", true) checkEvaluation("""\_%""" like "%\\\\__", true) @@ -63,12 +64,9 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation("\\\n\n%\\" like "\\\\___\\\\", true) checkEvaluation("""%%""" like """%%""", true) checkEvaluation("""\__""" like """\\\__""", true) - checkEvaluation("""\\\__""" like """%\\%\%""", false) - checkEvaluation("""\_""" like """\_\_""", false) checkEvaluation("""_\\\%""" like """%\\""", false) - checkEvaluation("""_\\__""" like """_\___""", false) - + } test("LIKE Non-literal Regular Expression") { @@ -90,6 +88,20 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation("a\nb" like regEx, true, create_row("a%b")) checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%")) + + + checkEvaluation("""\\\\""" like regEx, true, create_row("""%\\%""")) + checkEvaluation("""\_%""" like regEx, true, create_row("""%\\__""")) + checkEvaluation("""\_%""" like regEx, true, create_row("%\\\\__")) + checkEvaluation("""\_%""" like regEx, true, create_row("""%\\_%""")) + checkEvaluation("""\\\\%%""" like regEx, true, create_row("""\\%""")) + checkEvaluation("""\%\""" like regEx, true, create_row("""%\%%""")) + checkEvaluation("\\\n\n%\\" like regEx, true, create_row("\\\\___\\\\")) + checkEvaluation("""%%""" like regEx, true, create_row("""%%""")) + checkEvaluation("""\__""" like regEx, true, create_row("""\\\__""")) + checkEvaluation("""\\\__""" like regEx, false, create_row("""%\\%\%""")) + checkEvaluation("""_\\\%""" like regEx, false, create_row("""%\\""")) + } test("RLIKE literal Regular Expression") { -- cgit v1.2.3