From 6996bd2e81bf6597dcda499d9a9a80927a43e30f Mon Sep 17 00:00:00 2001 From: "zhichao.li" Date: Fri, 31 Jul 2015 21:18:01 -0700 Subject: [SPARK-8264][SQL]add substring_index function This PR is based on #7533 , thanks to zhichao-li Closes #7533 Author: zhichao.li Author: Davies Liu Closes #7843 from davies/str_index and squashes the following commits: 391347b [Davies Liu] add python api 3ce7802 [Davies Liu] fix substringIndex f2d29a1 [Davies Liu] Merge branch 'master' of github.com:apache/spark into str_index 515519b [zhichao.li] add foldable and remove null checking 9546991 [zhichao.li] scala style 67c253a [zhichao.li] hide some apis and clean code b19b013 [zhichao.li] add codegen and clean code ac863e9 [zhichao.li] reduce the calling of numChars 12e108f [zhichao.li] refine unittest d92951b [zhichao.li] add lastIndexOf 52d7b03 [zhichao.li] add substring_index function --- python/pyspark/sql/functions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'python') diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index bb9926ce8c..89a2a5ceaa 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -920,6 +920,25 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(1.5) +@ignore_unicode_prefix +def substring_index(str, delim, count): + """ + Returns the substring from string str before count occurrences of the delimiter delim. + If count is positive, everything the left of the final delimiter (counting from left) is + returned. If count is negative, every to the right of the final delimiter (counting from the + right) is returned. substring_index performs a case-sensitive match when searching for delim. + + >>> df = sqlContext.createDataFrame([('a.b.c.d',)], ['s']) + >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect() + [Row(s=u'a.b')] + >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect() + [Row(s=u'b.c.d')] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count)) + + @since(1.5) def size(col): """ -- cgit v1.2.3