diff options
Diffstat (limited to 'sql/catalyst/src/main/scala/org')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala | 34 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala | 18 |
2 files changed, 36 insertions, 16 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 3df6effb6f..0325d0e837 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -70,18 +70,28 @@ trait StringRegexExpression extends ImplicitCastInputTypes { @ExpressionDescription( usage = "str _FUNC_ pattern - Returns true if str matches pattern, " + "null if any arguments are null, false otherwise.", - extended = - "The pattern is a string which is matched literally, with exception to the following " + - "special symbols:\n\n" + - " _ matches any one character in the input (similar to . in posix " + - "regular expressions)\n\n" + - " % matches zero ore more characters in the input (similar to .* in " + - "posix regular expressions)\n\n" + - "The escape character is '\\'. If an escape character precedes a special symbol or " + - "another escape character, the following character is matched literally, For example, " + - "the expression `<path> like \\%SystemDrive\\%\\\\Users%` will match any `<path>` that " + - "starts with '%SystemDrive%\\Users'. It is invalid to escape any other character.\n\n" + - "Use RLIKE to match with standard regular expressions.") + extended = """ + Arguments: + str - a string expression + pattern - a string expression. The pattern is a string which is matched literally, with + exception to the following special symbols: + + _ matches any one character in the input (similar to . in posix regular expressions) + + % matches zero ore more characters in the input (similar to .* in posix regular + expressions) + + The escape character is '\'. If an escape character precedes a special symbol or another + escape character, the following character is matched literally. It is invalid to escape + any other character. + + Examples: + > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%' + true + + See also: + Use RLIKE to match with standard regular expressions. +""") case class Like(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala index b760b994f2..ca22ea2420 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -24,13 +24,23 @@ import org.apache.spark.unsafe.types.UTF8String object StringUtils { - /** Convert 'like' pattern to Java regex. */ - def escapeLikeRegex(str: String): String = { - val in = str.toIterator + /** + * Validate and convert SQL 'like' pattern to a Java regular expression. + * + * Underscores (_) are converted to '.' and percent signs (%) are converted to '.*', other + * characters are quoted literally. Escaping is done according to the rules specified in + * [[org.apache.spark.sql.catalyst.expressions.Like]] usage documentation. An invalid pattern will + * throw an [[AnalysisException]]. + * + * @param pattern the SQL pattern to convert + * @return the equivalent Java regular expression of the pattern + */ + def escapeLikeRegex(pattern: String): String = { + val in = pattern.toIterator val out = new StringBuilder() def fail(message: String) = throw new AnalysisException( - s"the pattern '$str' is invalid, $message") + s"the pattern '$pattern' is invalid, $message") while (in.hasNext) { in.next match { |