diff options
author | Davies Liu <davies@databricks.com> | 2015-07-02 15:43:02 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-02 15:43:02 -0700 |
commit | fc7aebd94a3c09657fc4dbded0997ed068304e0a (patch) | |
tree | fd7e7ec8c29f96641a1706000542f5a64c48f8a7 /python | |
parent | 7d9cc9673e47227f58411ca1f4e647cd8233a219 (diff) | |
download | spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.gz spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.tar.bz2 spark-fc7aebd94a3c09657fc4dbded0997ed068304e0a.zip |
[SPARK-8784] [SQL] Add Python API for hex and unhex
Also improve the performance of hex/unhex
Author: Davies Liu <davies@databricks.com>
Closes #7181 from davies/hex and squashes the following commits:
f032fbb [Davies Liu] Merge branch 'hex' of github.com:davies/spark into hex
49e325f [Davies Liu] Merge branch 'master' of github.com:apache/spark into hex
b31fc9a [Davies Liu] Update math.scala
25156b7 [Davies Liu] address comments and fix test
c3af78c [Davies Liu] address commments
1a24082 [Davies Liu] Add Python API for hex and unhex
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/functions.py | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 12263e6a75..8a470ce19b 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -383,6 +383,34 @@ def randn(seed=None): @ignore_unicode_prefix @since(1.5) +def hex(col): + """Computes hex value of the given column, which could be StringType, + BinaryType, IntegerType or LongType. + + >>> sqlContext.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect() + [Row(hex(a)=u'414243', hex(b)=u'3')] + """ + sc = SparkContext._active_spark_context + jc = sc._jvm.functions.hex(_to_java_column(col)) + return Column(jc) + + +@ignore_unicode_prefix +@since(1.5) +def unhex(col): + """Inverse of hex. Interprets each pair of characters as a hexadecimal number + and converts to the byte representation of number. + + >>> sqlContext.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect() + [Row(unhex(a)=bytearray(b'ABC'))] + """ + sc = SparkContext._active_spark_context + jc = sc._jvm.functions.unhex(_to_java_column(col)) + return Column(jc) + + +@ignore_unicode_prefix +@since(1.5) def sha1(col): """Returns the hex string result of SHA-1. |