aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
diff options
context:
space:
mode:
authorEric Liang <ekl@databricks.com>2016-07-12 23:09:02 -0700
committerReynold Xin <rxin@databricks.com>2016-07-12 23:09:02 -0700
commit1c58fa905b6543d366d00b2e5394dfd633987f6d (patch)
treecd75fb6d7e58f2563501afe5a46ff8de6f2246b5 /sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
parent56bd399a86c4e92be412d151200cb5e4a5f6a48a (diff)
downloadspark-1c58fa905b6543d366d00b2e5394dfd633987f6d.tar.gz
spark-1c58fa905b6543d366d00b2e5394dfd633987f6d.tar.bz2
spark-1c58fa905b6543d366d00b2e5394dfd633987f6d.zip
[SPARK-16514][SQL] Fix various regex codegen bugs
## What changes were proposed in this pull request? RegexExtract and RegexReplace currently crash on non-nullable input due use of a hard-coded local variable name (e.g. compiles fail with `java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 85, Column 26: Redefinition of local variable "m" `). This changes those variables to use fresh names, and also in a few other places. ## How was this patch tested? Unit tests. rxin Author: Eric Liang <ekl@databricks.com> Closes #14168 from ericl/sc-3906.
Diffstat (limited to 'sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala48
1 files changed, 33 insertions, 15 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 541b8601a3..be82b3b8f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -108,10 +108,11 @@ case class Like(left: Expression, right: Expression)
""")
}
} else {
+ val rightStr = ctx.freshName("rightStr")
nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
s"""
- String rightStr = ${eval2}.toString();
- ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr));
+ String $rightStr = ${eval2}.toString();
+ ${patternClass} $pattern = ${patternClass}.compile($escapeFunc($rightStr));
${ev.value} = $pattern.matcher(${eval1}.toString()).matches();
"""
})
@@ -157,10 +158,11 @@ case class RLike(left: Expression, right: Expression)
""")
}
} else {
+ val rightStr = ctx.freshName("rightStr")
nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
s"""
- String rightStr = ${eval2}.toString();
- ${patternClass} $pattern = ${patternClass}.compile(rightStr);
+ String $rightStr = ${eval2}.toString();
+ ${patternClass} $pattern = ${patternClass}.compile($rightStr);
${ev.value} = $pattern.matcher(${eval1}.toString()).find(0);
"""
})
@@ -259,6 +261,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
val classNamePattern = classOf[Pattern].getCanonicalName
val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName
+ val matcher = ctx.freshName("matcher")
+
ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;")
ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;")
ctx.addMutableState("String", termLastReplacement, s"${termLastReplacement} = null;")
@@ -267,6 +271,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
ctx.addMutableState(classNameStringBuffer,
termResult, s"${termResult} = new $classNameStringBuffer();")
+ val setEvNotNull = if (nullable) {
+ s"${ev.isNull} = false;"
+ } else {
+ ""
+ }
+
nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
s"""
if (!$regexp.equals(${termLastRegex})) {
@@ -280,14 +290,14 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
${termLastReplacement} = ${termLastReplacementInUTF8}.toString();
}
${termResult}.delete(0, ${termResult}.length());
- java.util.regex.Matcher m = ${termPattern}.matcher($subject.toString());
+ java.util.regex.Matcher ${matcher} = ${termPattern}.matcher($subject.toString());
- while (m.find()) {
- m.appendReplacement(${termResult}, ${termLastReplacement});
+ while (${matcher}.find()) {
+ ${matcher}.appendReplacement(${termResult}, ${termLastReplacement});
}
- m.appendTail(${termResult});
+ ${matcher}.appendTail(${termResult});
${ev.value} = UTF8String.fromString(${termResult}.toString());
- ${ev.isNull} = false;
+ $setEvNotNull
"""
})
}
@@ -334,10 +344,18 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
val termLastRegex = ctx.freshName("lastRegex")
val termPattern = ctx.freshName("pattern")
val classNamePattern = classOf[Pattern].getCanonicalName
+ val matcher = ctx.freshName("matcher")
+ val matchResult = ctx.freshName("matchResult")
ctx.addMutableState("UTF8String", termLastRegex, s"${termLastRegex} = null;")
ctx.addMutableState(classNamePattern, termPattern, s"${termPattern} = null;")
+ val setEvNotNull = if (nullable) {
+ s"${ev.isNull} = false;"
+ } else {
+ ""
+ }
+
nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => {
s"""
if (!$regexp.equals(${termLastRegex})) {
@@ -345,15 +363,15 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
${termLastRegex} = $regexp.clone();
${termPattern} = ${classNamePattern}.compile(${termLastRegex}.toString());
}
- java.util.regex.Matcher m =
+ java.util.regex.Matcher ${matcher} =
${termPattern}.matcher($subject.toString());
- if (m.find()) {
- java.util.regex.MatchResult mr = m.toMatchResult();
- ${ev.value} = UTF8String.fromString(mr.group($idx));
- ${ev.isNull} = false;
+ if (${matcher}.find()) {
+ java.util.regex.MatchResult ${matchResult} = ${matcher}.toMatchResult();
+ ${ev.value} = UTF8String.fromString(${matchResult}.group($idx));
+ $setEvNotNull
} else {
${ev.value} = UTF8String.EMPTY_UTF8;
- ${ev.isNull} = false;
+ $setEvNotNull
}"""
})
}