aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-01-05 10:23:36 -0800
committerReynold Xin <rxin@databricks.com>2016-01-05 10:23:36 -0800
commit76768337beec6842660db7522ad15c25ee66d346 (patch)
treed58976552a906917b9126712e75a7a2136fbd01c /python
parent9a6ba7e2c538124f539b50512a7f95059f81cc16 (diff)
downloadspark-76768337beec6842660db7522ad15c25ee66d346.tar.gz
spark-76768337beec6842660db7522ad15c25ee66d346.tar.bz2
spark-76768337beec6842660db7522ad15c25ee66d346.zip
[SPARK-12480][FOLLOW-UP] use a single column vararg for hash
address comments in #10435 This makes the API easier to use if user programmatically generate the call to hash, and they will get analysis exception if the arguments of hash is empty. Author: Wenchen Fan <wenchen@databricks.com> Closes #10588 from cloud-fan/hash.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py12
1 files changed, 12 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 7c15e38458..b0390cb994 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1018,6 +1018,18 @@ def sha2(col, numBits):
return Column(jc)
+@since(2.0)
+def hash(*cols):
+ """Calculates the hash code of given columns, and returns the result as a int column.
+
+ >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
+ [Row(hash=1358996357)]
+ """
+ sc = SparkContext._active_spark_context
+ jc = sc._jvm.functions.hash(_to_seq(sc, cols, _to_java_column))
+ return Column(jc)
+
+
# ---------------------- String/Binary functions ------------------------------
_string_functions = {