[SPARK-8784] [SQL] Add Python API for hex and unhex

Also improve the performance of hex/unhex Author: Davies Liu <davies@databricks.com> Closes #7181 from davies/hex and squashes the following commits: f032fbb [Davies Liu] Merge branch 'hex' of github.com:davies/spark into hex 49e325f [Davies Liu] Merge branch 'master' of github.com:apache/spark into hex b31fc9a [Davies Liu] Update math.scala 25156b7 [Davies Liu] address comments and fix test c3af78c [Davies Liu] address commments 1a24082 [Davies Liu] Add Python API for hex and unhex
author: Davies Liu <davies@databricks.com> 2015-07-02 15:43:02 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-07-02 15:43:02 -0700
commit: fc7aebd94a3c09657fc4dbded0997ed068304e0a (patch)
tree: fd7e7ec8c29f96641a1706000542f5a64c48f8a7 /python
parent: 7d9cc9673e47227f58411ca1f4e647cd8233a219 (diff)
download: spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.gz
spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.bz2
spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.zip
1 files changed, 28 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 12263e6a75..8a470ce19b 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -383,6 +383,34 @@ def randn(seed=None):
 
 @ignore_unicode_prefix
 @since(1.5)
+def hex(col):
+    """Computes hex value of the given column, which could be StringType,
+    BinaryType, IntegerType or LongType.
+
+    >>> sqlContext.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
+    [Row(hex(a)=u'414243', hex(b)=u'3')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.hex(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def unhex(col):
+    """Inverse of hex. Interprets each pair of characters as a hexadecimal number
+    and converts to the byte representation of number.
+
+    >>> sqlContext.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
+    [Row(unhex(a)=bytearray(b'ABC'))]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.unhex(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
 def sha1(col):
     """Returns the hex string result of SHA-1.
author	Davies Liu <davies@databricks.com>	2015-07-02 15:43:02 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-07-02 15:43:02 -0700
commit	fc7aebd94a3c09657fc4dbded0997ed068304e0a (patch)
tree	fd7e7ec8c29f96641a1706000542f5a64c48f8a7 /python
parent	7d9cc9673e47227f58411ca1f4e647cd8233a219 (diff)
download	spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.gz spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.bz2 spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.zip