aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Odersky <jakob@odersky.com>2016-10-13 11:36:43 -0700
committerJakob Odersky <jakob@odersky.com>2016-10-13 11:36:43 -0700
commit405b63bdceb6ff26864e65b84da4cd5af45aec48 (patch)
treed1f4c8f095c5c4e0357183ca74b6cb21aa4480fb
parent64df4cfc730dbc8c8085a414e620036dcbc92f3e (diff)
downloadspark-WIP-SPARK-17647.tar.gz
spark-WIP-SPARK-17647.tar.bz2
spark-WIP-SPARK-17647.zip
Add more documentationWIP-SPARK-17647
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala9
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala22
2 files changed, 25 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index d25da3fd58..4147ace511 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -68,7 +68,14 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
* Simple RegEx pattern matching function
*/
@ExpressionDescription(
- usage = "str _FUNC_ pattern - Returns true if str matches pattern and false otherwise.")
+ usage = "str _FUNC_ pattern - Returns true if str matches pattern, null if any arguments are null, false otherwise.",
+ extended =
+ "The pattern is a string which is matched literally, with exception to the following symbols:\n" +
+ " _ matches any one character in the input (similar to . in posix regular expressions)\n" +
+ " % matches zero ore more characters in the input (similar to .* in posix regular expressions)\n" +
+ "The default escape character is '\\'. Any character after the escape character will be matched against literally. " +
+ "Ending a pattern in an escape character is invalid and will throw a ?? exception.\n" +
+ "Use RLIKE to match with standard regular expressions.")
case class Like(left: Expression, right: Expression)
extends BinaryExpression with StringRegexExpression {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 9316f76190..9db3abacfa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{IntegerType, StringType}
/**
* Unit tests for regular expression (regexp) related SQL expressions.
@@ -54,6 +54,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation("ab" like "a%b", true)
checkEvaluation("a\nb" like "a%b", true)
+ // SI-17647 double-escaping backslash
checkEvaluation("""\\\\""" like """%\\%""", true) // triple quotes to avoid java string escaping
checkEvaluation("""\_%""" like """%\\__""", true)
checkEvaluation("""\_%""" like "%\\\\__", true)
@@ -63,12 +64,9 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation("\\\n\n%\\" like "\\\\___\\\\", true)
checkEvaluation("""%%""" like """%%""", true)
checkEvaluation("""\__""" like """\\\__""", true)
-
checkEvaluation("""\\\__""" like """%\\%\%""", false)
- checkEvaluation("""\_""" like """\_\_""", false)
checkEvaluation("""_\\\%""" like """%\\""", false)
- checkEvaluation("""_\\__""" like """_\___""", false)
-
+
}
test("LIKE Non-literal Regular Expression") {
@@ -90,6 +88,20 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%"))
+
+
+ checkEvaluation("""\\\\""" like regEx, true, create_row("""%\\%"""))
+ checkEvaluation("""\_%""" like regEx, true, create_row("""%\\__"""))
+ checkEvaluation("""\_%""" like regEx, true, create_row("%\\\\__"))
+ checkEvaluation("""\_%""" like regEx, true, create_row("""%\\_%"""))
+ checkEvaluation("""\\\\%%""" like regEx, true, create_row("""\\%"""))
+ checkEvaluation("""\%\""" like regEx, true, create_row("""%\%%"""))
+ checkEvaluation("\\\n\n%\\" like regEx, true, create_row("\\\\___\\\\"))
+ checkEvaluation("""%%""" like regEx, true, create_row("""%%"""))
+ checkEvaluation("""\__""" like regEx, true, create_row("""\\\__"""))
+ checkEvaluation("""\\\__""" like regEx, false, create_row("""%\\%\%"""))
+ checkEvaluation("""_\\\%""" like regEx, false, create_row("""%\\"""))
+
}
test("RLIKE literal Regular Expression") {