aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/functions.py
diff options
context:
space:
mode:
authorHuJiayin <jiayin.hu@intel.com>2015-07-31 16:05:26 -0700
committerReynold Xin <rxin@databricks.com>2015-07-31 16:05:26 -0700
commit4d5a6e7b60b315968973e2298eeee5eb174ec721 (patch)
tree8967ec9a096760ab45668136bb070f5d9d72179e /python/pyspark/sql/functions.py
parent3fc0cb92001798167a14c1377362a3335397dd4c (diff)
downloadspark-4d5a6e7b60b315968973e2298eeee5eb174ec721.tar.gz
spark-4d5a6e7b60b315968973e2298eeee5eb174ec721.tar.bz2
spark-4d5a6e7b60b315968973e2298eeee5eb174ec721.zip
[SPARK-8271][SQL]string function: soundex
This PR brings SQL function soundex(), see https://issues.apache.org/jira/browse/HIVE-9738 It's based on #7115 , thanks to HuJiayin Author: HuJiayin <jiayin.hu@intel.com> Author: Davies Liu <davies@databricks.com> Closes #7812 from davies/soundex and squashes the following commits: fa75941 [Davies Liu] Merge branch 'master' of github.com:apache/spark into soundex a4bd6d8 [Davies Liu] fix soundex 2538908 [HuJiayin] add codegen soundex d15d329 [HuJiayin] add back ut ded1a14 [HuJiayin] Merge branch 'master' of https://github.com/apache/spark e2dec2c [HuJiayin] support soundex rebase code
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r--python/pyspark/sql/functions.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 8024a8de07..bb9926ce8c 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -63,6 +63,8 @@ __all__ += [
'year', 'quarter', 'month', 'hour', 'minute', 'second',
'dayofmonth', 'dayofyear', 'weekofyear']
+__all__ += ['soundex']
+
def _create_function(name, doc=""):
""" Create a function for aggregator by name"""
@@ -922,6 +924,7 @@ def trunc(date, format):
def size(col):
"""
Collection function: returns the length of the array or map stored in the column.
+
:param col: name of column or expression
>>> df = sqlContext.createDataFrame([([1, 2, 3],),([1],),([],)], ['data'])
@@ -932,6 +935,20 @@ def size(col):
return Column(sc._jvm.functions.size(_to_java_column(col)))
+@since
+@ignore_unicode_prefix
+def soundex(col):
+ """
+ Returns the SoundEx encoding for a string
+
+ >>> df = sqlContext.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
+ >>> df.select(soundex(df.name).alias("soundex")).collect()
+ [Row(soundex=u'P362'), Row(soundex=u'U612')]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.size(_to_java_column(col)))
+
+
class UserDefinedFunction(object):
"""
User defined function in Python