aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorTarek Auel <tarek.auel@gmail.com>2015-06-29 11:57:19 -0700
committerDavies Liu <davies@databricks.com>2015-06-29 11:57:19 -0700
commita5c2961caaafd751f11bdd406bb6885443d7572e (patch)
tree8cdb6288d459f82e155e4510baa0a2523a76b6ad /sql/core
parent3664ee25f0a67de5ba76e9487a55a55216ae589f (diff)
downloadspark-a5c2961caaafd751f11bdd406bb6885443d7572e.tar.gz
spark-a5c2961caaafd751f11bdd406bb6885443d7572e.tar.bz2
spark-a5c2961caaafd751f11bdd406bb6885443d7572e.zip
[SPARK-8235] [SQL] misc function sha / sha1
Jira: https://issues.apache.org/jira/browse/SPARK-8235 I added the support for sha1. If I understood rxin correctly, sha and sha1 should execute the same algorithm, shouldn't they? Please take a close look on the Python part. This is adopted from #6934 Author: Tarek Auel <tarek.auel@gmail.com> Author: Tarek Auel <tarek.auel@googlemail.com> Closes #6963 from tarekauel/SPARK-8235 and squashes the following commits: f064563 [Tarek Auel] change to shaHex 7ce3cdc [Tarek Auel] rely on automatic cast a1251d6 [Tarek Auel] Merge remote-tracking branch 'upstream/master' into SPARK-8235 68eb043 [Tarek Auel] added docstring be5aff1 [Tarek Auel] improved error message 7336c96 [Tarek Auel] added type check cf23a80 [Tarek Auel] simplified example ebf75ef [Tarek Auel] [SPARK-8301] updated the python documentation. Removed sha in python and scala 6d6ff0d [Tarek Auel] [SPARK-8233] added docstring ea191a9 [Tarek Auel] [SPARK-8233] fixed signatureof python function. Added expected type to misc e3fd7c3 [Tarek Auel] SPARK[8235] added sha to the list of __all__ e5dad4e [Tarek Auel] SPARK[8235] sha / sha1
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala16
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala12
2 files changed, 28 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 355ce0e342..ef92801548 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1415,6 +1415,22 @@ object functions {
def md5(columnName: String): Column = md5(Column(columnName))
/**
+ * Calculates the SHA-1 digest and returns the value as a 40 character hex string.
+ *
+ * @group misc_funcs
+ * @since 1.5.0
+ */
+ def sha1(e: Column): Column = Sha1(e.expr)
+
+ /**
+ * Calculates the SHA-1 digest and returns the value as a 40 character hex string.
+ *
+ * @group misc_funcs
+ * @since 1.5.0
+ */
+ def sha1(columnName: String): Column = sha1(Column(columnName))
+
+ /**
* Calculates the SHA-2 family of hash functions and returns the value as a hex string.
*
* @group misc_funcs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 8baed57a7f..abfd47c811 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -144,6 +144,18 @@ class DataFrameFunctionsSuite extends QueryTest {
Row("902fbdd2b1df0c4f70b4a5d23525e932", "6ac1e56bc78f031059be7be854522c4c"))
}
+ test("misc sha1 function") {
+ val df = Seq(("ABC", "ABC".getBytes)).toDF("a", "b")
+ checkAnswer(
+ df.select(sha1($"a"), sha1("b")),
+ Row("3c01bdbb26f358bab27f267924aa2c9a03fcfdb8", "3c01bdbb26f358bab27f267924aa2c9a03fcfdb8"))
+
+ val dfEmpty = Seq(("", "".getBytes)).toDF("a", "b")
+ checkAnswer(
+ dfEmpty.selectExpr("sha1(a)", "sha1(b)"),
+ Row("da39a3ee5e6b4b0d3255bfef95601890afd80709", "da39a3ee5e6b4b0d3255bfef95601890afd80709"))
+ }
+
test("misc sha2 function") {
val df = Seq(("ABC", Array[Byte](1, 2, 3, 4, 5, 6))).toDF("a", "b")
checkAnswer(