aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-08-07 12:20:07 +0100
committerSean Owen <sowen@cloudera.com>2016-08-07 12:20:07 +0100
commit8d8725208771a8815a60160a5a30dc6ea87a7e6a (patch)
tree191042d5279e59aeaadc60f3585c32ebaf5ca3ee /python
parentbdfab9f942dcad7c1f3de9b6df5c01dee2392055 (diff)
downloadspark-8d8725208771a8815a60160a5a30dc6ea87a7e6a.tar.gz
spark-8d8725208771a8815a60160a5a30dc6ea87a7e6a.tar.bz2
spark-8d8725208771a8815a60160a5a30dc6ea87a7e6a.zip
[SPARK-16409][SQL] regexp_extract with optional groups causes NPE
## What changes were proposed in this pull request? regexp_extract actually returns null when it shouldn't when a regex matches but the requested optional group did not. This makes it return an empty string, as apparently designed. ## How was this patch tested? Additional unit test Author: Sean Owen <sowen@cloudera.com> Closes #14504 from srowen/SPARK-16409.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e422363ec1..8a01805ec8 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1445,6 +1445,9 @@ def regexp_extract(str, pattern, idx):
>>> df = spark.createDataFrame([('100-200',)], ['str'])
>>> df.select(regexp_extract('str', '(\d+)-(\d+)', 1).alias('d')).collect()
[Row(d=u'100')]
+ >>> df = spark.createDataFrame([('aaaac',)], ['str'])
+ >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
+ [Row(d=u'')]
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.regexp_extract(_to_java_column(str), pattern, idx)