From 0578ff9681edbaab4ae68f67272dc3d4d890d53b Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 10 Aug 2016 10:14:43 +0100
Subject: [SPARK-16324][SQL] regexp_extract should doc that it returns empty
 string when match fails

## What changes were proposed in this pull request?

Doc that regexp_extract returns empty string when regex or group does not match

## How was this patch tested?

Jenkins test, with a few new test cases

Author: Sean Owen <sowen@cloudera.com>

Closes #14525 from srowen/SPARK-16324.
---
 python/pyspark/sql/functions.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'python')

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 8a01805ec8..4ea83e24bb 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1440,11 +1440,15 @@ def split(str, pattern):
 @ignore_unicode_prefix
 @since(1.5)
 def regexp_extract(str, pattern, idx):
-    """Extract a specific(idx) group identified by a java regex, from the specified string column.
+    """Extract a specific group matched by a Java regex, from the specified string column.
+    If the regex did not match, or the specified group did not match, an empty string is returned.
 
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
     >>> df.select(regexp_extract('str', '(\d+)-(\d+)', 1).alias('d')).collect()
     [Row(d=u'100')]
+    >>> df = spark.createDataFrame([('foo',)], ['str'])
+    >>> df.select(regexp_extract('str', '(\d+)', 1).alias('d')).collect()
+    [Row(d=u'')]
     >>> df = spark.createDataFrame([('aaaac',)], ['str'])
     >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
     [Row(d=u'')]
-- 
cgit v1.2.3