aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/functions.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r--python/pyspark/sql/functions.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index bb9926ce8c..89a2a5ceaa 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -921,6 +921,25 @@ def trunc(date, format):
@since(1.5)
+@ignore_unicode_prefix
+def substring_index(str, delim, count):
+ """
+ Returns the substring from string str before count occurrences of the delimiter delim.
+ If count is positive, everything the left of the final delimiter (counting from left) is
+ returned. If count is negative, every to the right of the final delimiter (counting from the
+ right) is returned. substring_index performs a case-sensitive match when searching for delim.
+
+ >>> df = sqlContext.createDataFrame([('a.b.c.d',)], ['s'])
+ >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect()
+ [Row(s=u'a.b')]
+ >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect()
+ [Row(s=u'b.c.d')]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count))
+
+
+@since(1.5)
def size(col):
"""
Collection function: returns the length of the array or map stored in the column.