[SPARK-8237] [SQL] Add misc function sha2

JIRA: https://issues.apache.org/jira/browse/SPARK-8237 Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #6934 from viirya/expr_sha2 and squashes the following commits: 35e0bb3 [Liang-Chi Hsieh] For comments. 68b5284 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into expr_sha2 8573aff [Liang-Chi Hsieh] Remove unnecessary Product. ee61e06 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into expr_sha2 59e41aa [Liang-Chi Hsieh] Add misc function: sha2.
author: Liang-Chi Hsieh <viirya@gmail.com> 2015-06-25 22:07:37 -0700
committer: Davies Liu <davies@databricks.com> 2015-06-25 22:07:37 -0700
commit: 47c874babe7779c7a2f32e0b891503ef6bebcab0 (patch)
tree: 40a848f19d98db6c01cd2dccfe716a36f5c491fd /python
parent: c392a9efabcb1ec2a2c53f001ecdae33c245ba35 (diff)
download: spark-47c874babe7779c7a2f32e0b891503ef6bebcab0.tar.gz
spark-47c874babe7779c7a2f32e0b891503ef6bebcab0.tar.bz2
spark-47c874babe7779c7a2f32e0b891503ef6bebcab0.zip
1 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index cfa87aeea1..7d3d036161 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -42,6 +42,7 @@ __all__ = [
     'monotonicallyIncreasingId',
     'rand',
     'randn',
+    'sha2',
     'sparkPartitionId',
     'struct',
     'udf',
@@ -363,6 +364,24 @@ def randn(seed=None):
     return Column(jc)
 
 
+@ignore_unicode_prefix
+@since(1.5)
+def sha2(col, numBits):
+    """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384,
+    and SHA-512). The numBits indicates the desired bit length of the result, which must have a
+    value of 224, 256, 384, 512, or 0 (which is equivalent to 256).
+
+    >>> digests = df.select(sha2(df.name, 256).alias('s')).collect()
+    >>> digests[0]
+    Row(s=u'3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043')
+    >>> digests[1]
+    Row(s=u'cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961')
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.sha2(_to_java_column(col), numBits)
+    return Column(jc)
+
+
 @since(1.4)
 def sparkPartitionId():
     """A column for partition ID of the Spark task.
author	Liang-Chi Hsieh <viirya@gmail.com>	2015-06-25 22:07:37 -0700
committer	Davies Liu <davies@databricks.com>	2015-06-25 22:07:37 -0700
commit	47c874babe7779c7a2f32e0b891503ef6bebcab0 (patch)
tree	40a848f19d98db6c01cd2dccfe716a36f5c491fd /python
parent	c392a9efabcb1ec2a2c53f001ecdae33c245ba35 (diff)
download	spark-47c874babe7779c7a2f32e0b891503ef6bebcab0.tar.gz spark-47c874babe7779c7a2f32e0b891503ef6bebcab0.tar.bz2 spark-47c874babe7779c7a2f32e0b891503ef6bebcab0.zip