aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main/scala/org/apache
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-08-07 12:20:07 +0100
committerSean Owen <sowen@cloudera.com>2016-08-07 12:20:07 +0100
commit8d8725208771a8815a60160a5a30dc6ea87a7e6a (patch)
tree191042d5279e59aeaadc60f3585c32ebaf5ca3ee /sql/catalyst/src/main/scala/org/apache
parentbdfab9f942dcad7c1f3de9b6df5c01dee2392055 (diff)
downloadspark-8d8725208771a8815a60160a5a30dc6ea87a7e6a.tar.gz
spark-8d8725208771a8815a60160a5a30dc6ea87a7e6a.tar.bz2
spark-8d8725208771a8815a60160a5a30dc6ea87a7e6a.zip
[SPARK-16409][SQL] regexp_extract with optional groups causes NPE
## What changes were proposed in this pull request? regexp_extract actually returns null when it shouldn't when a regex matches but the requested optional group did not. This makes it return an empty string, as apparently designed. ## How was this patch tested? Additional unit test Author: Sean Owen <sowen@cloudera.com> Closes #14504 from srowen/SPARK-16409.
Diffstat (limited to 'sql/catalyst/src/main/scala/org/apache')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala13
1 files changed, 11 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index be82b3b8f4..d25da3fd58 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -329,7 +329,12 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
val m = pattern.matcher(s.toString)
if (m.find) {
val mr: MatchResult = m.toMatchResult
- UTF8String.fromString(mr.group(r.asInstanceOf[Int]))
+ val group = mr.group(r.asInstanceOf[Int])
+ if (group == null) { // Pattern matched, but not optional group
+ UTF8String.EMPTY_UTF8
+ } else {
+ UTF8String.fromString(group)
+ }
} else {
UTF8String.EMPTY_UTF8
}
@@ -367,7 +372,11 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
${termPattern}.matcher($subject.toString());
if (${matcher}.find()) {
java.util.regex.MatchResult ${matchResult} = ${matcher}.toMatchResult();
- ${ev.value} = UTF8String.fromString(${matchResult}.group($idx));
+ if (${matchResult}.group($idx) == null) {
+ ${ev.value} = UTF8String.EMPTY_UTF8;
+ } else {
+ ${ev.value} = UTF8String.fromString(${matchResult}.group($idx));
+ }
$setEvNotNull
} else {
${ev.value} = UTF8String.EMPTY_UTF8;