diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/functions.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index bb9926ce8c..89a2a5ceaa 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -921,6 +921,25 @@ def trunc(date, format): @since(1.5) +@ignore_unicode_prefix +def substring_index(str, delim, count): + """ + Returns the substring from string str before count occurrences of the delimiter delim. + If count is positive, everything the left of the final delimiter (counting from left) is + returned. If count is negative, every to the right of the final delimiter (counting from the + right) is returned. substring_index performs a case-sensitive match when searching for delim. + + >>> df = sqlContext.createDataFrame([('a.b.c.d',)], ['s']) + >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect() + [Row(s=u'a.b')] + >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect() + [Row(s=u'b.c.d')] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count)) + + +@since(1.5) def size(col): """ Collection function: returns the length of the array or map stored in the column. |