aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main/scala/org
diff options
context:
space:
mode:
Diffstat (limited to 'sql/catalyst/src/main/scala/org')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala34
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala18
2 files changed, 36 insertions, 16 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 3df6effb6f..0325d0e837 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -70,18 +70,28 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
@ExpressionDescription(
usage = "str _FUNC_ pattern - Returns true if str matches pattern, " +
"null if any arguments are null, false otherwise.",
- extended =
- "The pattern is a string which is matched literally, with exception to the following " +
- "special symbols:\n\n" +
- " _ matches any one character in the input (similar to . in posix " +
- "regular expressions)\n\n" +
- " % matches zero ore more characters in the input (similar to .* in " +
- "posix regular expressions)\n\n" +
- "The escape character is '\\'. If an escape character precedes a special symbol or " +
- "another escape character, the following character is matched literally, For example, " +
- "the expression `<path> like \\%SystemDrive\\%\\\\Users%` will match any `<path>` that " +
- "starts with '%SystemDrive%\\Users'. It is invalid to escape any other character.\n\n" +
- "Use RLIKE to match with standard regular expressions.")
+ extended = """
+ Arguments:
+ str - a string expression
+ pattern - a string expression. The pattern is a string which is matched literally, with
+ exception to the following special symbols:
+
+ _ matches any one character in the input (similar to . in posix regular expressions)
+
+ % matches zero ore more characters in the input (similar to .* in posix regular
+ expressions)
+
+ The escape character is '\'. If an escape character precedes a special symbol or another
+ escape character, the following character is matched literally. It is invalid to escape
+ any other character.
+
+ Examples:
+ > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
+ true
+
+ See also:
+ Use RLIKE to match with standard regular expressions.
+""")
case class Like(left: Expression, right: Expression)
extends BinaryExpression with StringRegexExpression {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index b760b994f2..ca22ea2420 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -24,13 +24,23 @@ import org.apache.spark.unsafe.types.UTF8String
object StringUtils {
- /** Convert 'like' pattern to Java regex. */
- def escapeLikeRegex(str: String): String = {
- val in = str.toIterator
+ /**
+ * Validate and convert SQL 'like' pattern to a Java regular expression.
+ *
+ * Underscores (_) are converted to '.' and percent signs (%) are converted to '.*', other
+ * characters are quoted literally. Escaping is done according to the rules specified in
+ * [[org.apache.spark.sql.catalyst.expressions.Like]] usage documentation. An invalid pattern will
+ * throw an [[AnalysisException]].
+ *
+ * @param pattern the SQL pattern to convert
+ * @return the equivalent Java regular expression of the pattern
+ */
+ def escapeLikeRegex(pattern: String): String = {
+ val in = pattern.toIterator
val out = new StringBuilder()
def fail(message: String) = throw new AnalysisException(
- s"the pattern '$str' is invalid, $message")
+ s"the pattern '$pattern' is invalid, $message")
while (in.hasNext) {
in.next match {