aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-07-06 10:54:43 +0800
committerWenchen Fan <wenchen@databricks.com>2016-07-06 10:54:43 +0800
commitd0d28507cacfca5919dbfb4269892d58b62e8662 (patch)
tree474edb76ba7642ca385c743a5aad5bf437f46ef4 /sql/core
parentfdde7d0aa0ef69d0e9a88cf712601bba1d5b0706 (diff)
downloadspark-d0d28507cacfca5919dbfb4269892d58b62e8662.tar.gz
spark-d0d28507cacfca5919dbfb4269892d58b62e8662.tar.bz2
spark-d0d28507cacfca5919dbfb4269892d58b62e8662.zip
[SPARK-16286][SQL] Implement stack table generating function
## What changes were proposed in this pull request? This PR implements `stack` table generating function. ## How was this patch tested? Pass the Jenkins tests including new testcases. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #14033 from dongjoon-hyun/SPARK-16286.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala53
1 files changed, 53 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index d8a0aa4d52..aedc0a8d6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -23,6 +23,59 @@ import org.apache.spark.sql.test.SharedSQLContext
class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
import testImplicits._
+ test("stack") {
+ val df = spark.range(1)
+
+ // Empty DataFrame suppress the result generation
+ checkAnswer(spark.emptyDataFrame.selectExpr("stack(1, 1, 2, 3)"), Nil)
+
+ // Rows & columns
+ checkAnswer(df.selectExpr("stack(1, 1, 2, 3)"), Row(1, 2, 3) :: Nil)
+ checkAnswer(df.selectExpr("stack(2, 1, 2, 3)"), Row(1, 2) :: Row(3, null) :: Nil)
+ checkAnswer(df.selectExpr("stack(3, 1, 2, 3)"), Row(1) :: Row(2) :: Row(3) :: Nil)
+ checkAnswer(df.selectExpr("stack(4, 1, 2, 3)"), Row(1) :: Row(2) :: Row(3) :: Row(null) :: Nil)
+
+ // Various column types
+ checkAnswer(df.selectExpr("stack(3, 1, 1.1, 'a', 2, 2.2, 'b', 3, 3.3, 'c')"),
+ Row(1, 1.1, "a") :: Row(2, 2.2, "b") :: Row(3, 3.3, "c") :: Nil)
+
+ // Repeat generation at every input row
+ checkAnswer(spark.range(2).selectExpr("stack(2, 1, 2, 3)"),
+ Row(1, 2) :: Row(3, null) :: Row(1, 2) :: Row(3, null) :: Nil)
+
+ // The first argument must be a positive constant integer.
+ val m = intercept[AnalysisException] {
+ df.selectExpr("stack(1.1, 1, 2, 3)")
+ }.getMessage
+ assert(m.contains("The number of rows must be a positive constant integer."))
+ val m2 = intercept[AnalysisException] {
+ df.selectExpr("stack(-1, 1, 2, 3)")
+ }.getMessage
+ assert(m2.contains("The number of rows must be a positive constant integer."))
+
+ // The data for the same column should have the same type.
+ val m3 = intercept[AnalysisException] {
+ df.selectExpr("stack(2, 1, '2.2')")
+ }.getMessage
+ assert(m3.contains("data type mismatch: Argument 1 (IntegerType) != Argument 2 (StringType)"))
+
+ // stack on column data
+ val df2 = Seq((2, 1, 2, 3)).toDF("n", "a", "b", "c")
+ checkAnswer(df2.selectExpr("stack(2, a, b, c)"), Row(1, 2) :: Row(3, null) :: Nil)
+
+ val m4 = intercept[AnalysisException] {
+ df2.selectExpr("stack(n, a, b, c)")
+ }.getMessage
+ assert(m4.contains("The number of rows must be a positive constant integer."))
+
+ val df3 = Seq((2, 1, 2.0)).toDF("n", "a", "b")
+ val m5 = intercept[AnalysisException] {
+ df3.selectExpr("stack(2, a, b)")
+ }.getMessage
+ assert(m5.contains("data type mismatch: Argument 1 (IntegerType) != Argument 2 (DoubleType)"))
+
+ }
+
test("single explode") {
val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
checkAnswer(