[SPARK-8263] [SQL] substr/substring should also support binary type

This is based on #7641, thanks to zhichao-li Closes #7641 Author: zhichao.li <zhichao.li@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7848 from davies/substr and squashes the following commits: 461b709 [Davies Liu] remove bytearry from tests b45377a [Davies Liu] Merge branch 'master' of github.com:apache/spark into substr 01d795e [zhichao.li] scala style 99aa130 [zhichao.li] add substring to dataframe 4f68bfe [zhichao.li] add binary type support for substring
author: zhichao.li <zhichao.li@intel.com> 2015-08-01 08:48:46 -0700
committer: Davies Liu <davies.liu@gmail.com> 2015-08-01 08:48:46 -0700
commit: c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce (patch)
tree: c1b3ddbb2f8697743cd8e11aaeedcdf80d1adec7 /python
parent: cf6c9ca32a89422e25007d333bc8714d9b0ae6d8 (diff)
download: spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.gz
spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.bz2
spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.zip
1 files changed, 17 insertions, 1 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 81dc7d832e..96975f54ff 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -64,7 +64,7 @@ __all__ += [
     'year', 'quarter', 'month', 'hour', 'minute', 'second',
     'dayofmonth', 'dayofyear', 'weekofyear']
 
-__all__ += ['soundex']
+__all__ += ['soundex', 'substring', 'substring_index']
 
 
 def _create_function(name, doc=""):
@@ -925,6 +925,22 @@ def trunc(date, format):
 
 @since(1.5)
 @ignore_unicode_prefix
+def substring(str, pos, len):
+    """
+    Substring starts at `pos` and is of length `len` when str is String type or
+    returns the slice of byte array that starts at `pos` in byte and is of length `len`
+    when str is Binary type
+
+    >>> df = sqlContext.createDataFrame([('abcd',)], ['s',])
+    >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
+    [Row(s=u'ab')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.substring(_to_java_column(str), pos, len))
+
+
+@since(1.5)
+@ignore_unicode_prefix
 def substring_index(str, delim, count):
     """
     Returns the substring from string str before count occurrences of the delimiter delim.
author	zhichao.li <zhichao.li@intel.com>	2015-08-01 08:48:46 -0700
committer	Davies Liu <davies.liu@gmail.com>	2015-08-01 08:48:46 -0700
commit	c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce (patch)
tree	c1b3ddbb2f8697743cd8e11aaeedcdf80d1adec7 /python
parent	cf6c9ca32a89422e25007d333bc8714d9b0ae6d8 (diff)
download	spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.gz spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.tar.bz2 spark-c5166f7a69faeaa8a41a774c73c1ed4d4c2cf0ce.zip