diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-01-05 10:23:36 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-01-05 10:23:36 -0800 |
commit | 76768337beec6842660db7522ad15c25ee66d346 (patch) | |
tree | d58976552a906917b9126712e75a7a2136fbd01c /python/pyspark/sql | |
parent | 9a6ba7e2c538124f539b50512a7f95059f81cc16 (diff) | |
download | spark-76768337beec6842660db7522ad15c25ee66d346.tar.gz spark-76768337beec6842660db7522ad15c25ee66d346.tar.bz2 spark-76768337beec6842660db7522ad15c25ee66d346.zip |
[SPARK-12480][FOLLOW-UP] use a single column vararg for hash
address comments in #10435
This makes the API easier to use if user programmatically generate the call to hash, and they will get analysis exception if the arguments of hash is empty.
Author: Wenchen Fan <wenchen@databricks.com>
Closes #10588 from cloud-fan/hash.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/functions.py | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 7c15e38458..b0390cb994 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1018,6 +1018,18 @@ def sha2(col, numBits): return Column(jc) +@since(2.0) +def hash(*cols): + """Calculates the hash code of given columns, and returns the result as a int column. + + >>> sqlContext.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect() + [Row(hash=1358996357)] + """ + sc = SparkContext._active_spark_context + jc = sc._jvm.functions.hash(_to_seq(sc, cols, _to_java_column)) + return Column(jc) + + # ---------------------- String/Binary functions ------------------------------ _string_functions = { |