aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpiro Michaylov <spiro@michaylov.com>2015-07-03 20:15:58 -0700
committerReynold Xin <rxin@databricks.com>2015-07-03 20:15:58 -0700
commite92c24d37cae54634e7af20cbfe313d023786f87 (patch)
treedb154ca834049bd07b267da0f282911c0d8d9b94
parentf0fac2aa80da7c739b88043571e5d49ba40f9413 (diff)
downloadspark-e92c24d37cae54634e7af20cbfe313d023786f87.tar.gz
spark-e92c24d37cae54634e7af20cbfe313d023786f87.tar.bz2
spark-e92c24d37cae54634e7af20cbfe313d023786f87.zip
[SPARK-8810] [SQL] Added several UDF unit tests for Spark SQL
One test for each of the GROUP BY, WHERE and HAVING clauses, and one that combines all three with an additional UDF in the SELECT. (Since this is my first attempt at contributing to SPARK, meta-level guidance on anything I've screwed up would be greatly appreciated, whether important or minor.) Author: Spiro Michaylov <spiro@michaylov.com> Closes #7207 from spirom/udf-test-branch and squashes the following commits: 6bbba9e [Spiro Michaylov] Responded to review comments on UDF unit tests 1a3c5ff [Spiro Michaylov] Added several UDF unit tests for Spark SQL
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala70
1 files changed, 70 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 703a34c47e..8e5da3ac14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -82,6 +82,76 @@ class UDFSuite extends QueryTest {
assert(ctx.sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5)
}
+ test("UDF in a WHERE") {
+ ctx.udf.register("oneArgFilter", (n: Int) => { n > 80 })
+
+ val df = ctx.sparkContext.parallelize(
+ (1 to 100).map(i => TestData(i, i.toString))).toDF()
+ df.registerTempTable("integerData")
+
+ val result =
+ ctx.sql("SELECT * FROM integerData WHERE oneArgFilter(key)")
+ assert(result.count() === 20)
+ }
+
+ test("UDF in a HAVING") {
+ ctx.udf.register("havingFilter", (n: Long) => { n > 5 })
+
+ val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+ ("green", 100), ("green", 200)).toDF("g", "v")
+ df.registerTempTable("groupData")
+
+ val result =
+ ctx.sql(
+ """
+ | SELECT g, SUM(v) as s
+ | FROM groupData
+ | GROUP BY g
+ | HAVING havingFilter(s)
+ """.stripMargin)
+
+ assert(result.count() === 2)
+ }
+
+ test("UDF in a GROUP BY") {
+ ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
+
+ val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+ ("green", 100), ("green", 200)).toDF("g", "v")
+ df.registerTempTable("groupData")
+
+ val result =
+ ctx.sql(
+ """
+ | SELECT SUM(v)
+ | FROM groupData
+ | GROUP BY groupFunction(v)
+ """.stripMargin)
+ assert(result.count() === 2)
+ }
+
+ test("UDFs everywhere") {
+ ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
+ ctx.udf.register("havingFilter", (n: Long) => { n > 2000 })
+ ctx.udf.register("whereFilter", (n: Int) => { n < 150 })
+ ctx.udf.register("timesHundred", (n: Long) => { n * 100 })
+
+ val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+ ("green", 100), ("green", 200)).toDF("g", "v")
+ df.registerTempTable("groupData")
+
+ val result =
+ ctx.sql(
+ """
+ | SELECT timesHundred(SUM(v)) as v100
+ | FROM groupData
+ | WHERE whereFilter(v)
+ | GROUP BY groupFunction(v)
+ | HAVING havingFilter(v100)
+ """.stripMargin)
+ assert(result.count() === 1)
+ }
+
test("struct UDF") {
ctx.udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))