[SPARK-8810] [SQL] Added several UDF unit tests for Spark SQL

One test for each of the GROUP BY, WHERE and HAVING clauses, and one that combines all three with an additional UDF in the SELECT. (Since this is my first attempt at contributing to SPARK, meta-level guidance on anything I've screwed up would be greatly appreciated, whether important or minor.) Author: Spiro Michaylov <spiro@michaylov.com> Closes #7207 from spirom/udf-test-branch and squashes the following commits: 6bbba9e [Spiro Michaylov] Responded to review comments on UDF unit tests 1a3c5ff [Spiro Michaylov] Added several UDF unit tests for Spark SQL
author: Spiro Michaylov <spiro@michaylov.com> 2015-07-03 20:15:58 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-07-03 20:15:58 -0700
commit: e92c24d37cae54634e7af20cbfe313d023786f87 (patch)
tree: db154ca834049bd07b267da0f282911c0d8d9b94
parent: f0fac2aa80da7c739b88043571e5d49ba40f9413 (diff)
download: spark-e92c24d37cae54634e7af20cbfe313d023786f87.tar.gz
spark-e92c24d37cae54634e7af20cbfe313d023786f87.tar.bz2
spark-e92c24d37cae54634e7af20cbfe313d023786f87.zip
1 files changed, 70 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 703a34c47e..8e5da3ac14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -82,6 +82,76 @@ class UDFSuite extends QueryTest {
     assert(ctx.sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5)
   }
 
+  test("UDF in a WHERE") {
+    ctx.udf.register("oneArgFilter", (n: Int) => { n > 80 })
+
+    val df = ctx.sparkContext.parallelize(
+      (1 to 100).map(i => TestData(i, i.toString))).toDF()
+    df.registerTempTable("integerData")
+
+    val result =
+      ctx.sql("SELECT * FROM integerData WHERE oneArgFilter(key)")
+    assert(result.count() === 20)
+  }
+
+  test("UDF in a HAVING") {
+    ctx.udf.register("havingFilter", (n: Long) => { n > 5 })
+
+    val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+      ("green", 100), ("green", 200)).toDF("g", "v")
+    df.registerTempTable("groupData")
+
+    val result =
+      ctx.sql(
+        """
+         | SELECT g, SUM(v) as s
+         | FROM groupData
+         | GROUP BY g
+         | HAVING havingFilter(s)
+        """.stripMargin)
+
+    assert(result.count() === 2)
+  }
+
+  test("UDF in a GROUP BY") {
+    ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
+
+    val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+      ("green", 100), ("green", 200)).toDF("g", "v")
+    df.registerTempTable("groupData")
+
+    val result =
+      ctx.sql(
+        """
+         | SELECT SUM(v)
+         | FROM groupData
+         | GROUP BY groupFunction(v)
+        """.stripMargin)
+    assert(result.count() === 2)
+  }
+
+  test("UDFs everywhere") {
+    ctx.udf.register("groupFunction", (n: Int) => { n > 10 })
+    ctx.udf.register("havingFilter", (n: Long) => { n > 2000 })
+    ctx.udf.register("whereFilter", (n: Int) => { n < 150 })
+    ctx.udf.register("timesHundred", (n: Long) => { n * 100 })
+
+    val df = Seq(("red", 1), ("red", 2), ("blue", 10),
+      ("green", 100), ("green", 200)).toDF("g", "v")
+    df.registerTempTable("groupData")
+
+    val result =
+      ctx.sql(
+        """
+         | SELECT timesHundred(SUM(v)) as v100
+         | FROM groupData
+         | WHERE whereFilter(v)
+         | GROUP BY groupFunction(v)
+         | HAVING havingFilter(v100)
+        """.stripMargin)
+    assert(result.count() === 1)
+  }
+
   test("struct UDF") {
     ctx.udf.register("returnStruct", (f1: String, f2: String) => FunctionResult(f1, f2))
author	Spiro Michaylov <spiro@michaylov.com>	2015-07-03 20:15:58 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-07-03 20:15:58 -0700
commit	e92c24d37cae54634e7af20cbfe313d023786f87 (patch)
tree	db154ca834049bd07b267da0f282911c0d8d9b94
parent	f0fac2aa80da7c739b88043571e5d49ba40f9413 (diff)
download	spark-e92c24d37cae54634e7af20cbfe313d023786f87.tar.gz spark-e92c24d37cae54634e7af20cbfe313d023786f87.tar.bz2 spark-e92c24d37cae54634e7af20cbfe313d023786f87.zip