aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-07-02 15:43:02 -0700
committerReynold Xin <rxin@databricks.com>2015-07-02 15:43:02 -0700
commitfc7aebd94a3c09657fc4dbded0997ed068304e0a (patch)
treefd7e7ec8c29f96641a1706000542f5a64c48f8a7 /python
parent7d9cc9673e47227f58411ca1f4e647cd8233a219 (diff)
downloadspark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.gz
spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.bz2
spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.zip
[SPARK-8784] [SQL] Add Python API for hex and unhex
Also improve the performance of hex/unhex Author: Davies Liu <davies@databricks.com> Closes #7181 from davies/hex and squashes the following commits: f032fbb [Davies Liu] Merge branch 'hex' of github.com:davies/spark into hex 49e325f [Davies Liu] Merge branch 'master' of github.com:apache/spark into hex b31fc9a [Davies Liu] Update math.scala 25156b7 [Davies Liu] address comments and fix test c3af78c [Davies Liu] address commments 1a24082 [Davies Liu] Add Python API for hex and unhex
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 12263e6a75..8a470ce19b 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -383,6 +383,34 @@ def randn(seed=None):
@ignore_unicode_prefix
@since(1.5)
+def hex(col):
+ """Computes hex value of the given column, which could be StringType,
+ BinaryType, IntegerType or LongType.
+
+ >>> sqlContext.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
+ [Row(hex(a)=u'414243', hex(b)=u'3')]
+ """
+ sc = SparkContext._active_spark_context
+ jc = sc._jvm.functions.hex(_to_java_column(col))
+ return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def unhex(col):
+ """Inverse of hex. Interprets each pair of characters as a hexadecimal number
+ and converts to the byte representation of number.
+
+ >>> sqlContext.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
+ [Row(unhex(a)=bytearray(b'ABC'))]
+ """
+ sc = SparkContext._active_spark_context
+ jc = sc._jvm.functions.unhex(_to_java_column(col))
+ return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
def sha1(col):
"""Returns the hex string result of SHA-1.