aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/functions.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 69e563ef36..49dd0332af 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -325,6 +325,20 @@ def explode(col):
@ignore_unicode_prefix
@since(1.5)
+def levenshtein(left, right):
+ """Computes the Levenshtein distance of the two given strings.
+
+ >>> df0 = sqlContext.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
+ >>> df0.select(levenshtein('l', 'r').alias('d')).collect()
+ [Row(d=3)]
+ """
+ sc = SparkContext._active_spark_context
+ jc = sc._jvm.functions.levenshtein(_to_java_column(left), _to_java_column(right))
+ return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
def md5(col):
"""Calculates the MD5 digest and returns the value as a 32 character hex string.