aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/functions.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r--python/pyspark/sql/functions.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 81dc7d832e..96975f54ff 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -64,7 +64,7 @@ __all__ += [
'year', 'quarter', 'month', 'hour', 'minute', 'second',
'dayofmonth', 'dayofyear', 'weekofyear']
-__all__ += ['soundex']
+__all__ += ['soundex', 'substring', 'substring_index']
def _create_function(name, doc=""):
@@ -925,6 +925,22 @@ def trunc(date, format):
@since(1.5)
@ignore_unicode_prefix
+def substring(str, pos, len):
+ """
+ Substring starts at `pos` and is of length `len` when str is String type or
+ returns the slice of byte array that starts at `pos` in byte and is of length `len`
+ when str is Binary type
+
+ >>> df = sqlContext.createDataFrame([('abcd',)], ['s',])
+ >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
+ [Row(s=u'ab')]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.substring(_to_java_column(str), pos, len))
+
+
+@since(1.5)
+@ignore_unicode_prefix
def substring_index(str, delim, count):
"""
Returns the substring from string str before count occurrences of the delimiter delim.