[SPARK-7320] [SQL] Add Cube / Rollup for dataframe

Add `cube` & `rollup` for DataFrame For example: ```scala testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")) testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")) ``` Author: Cheng Hao <hao.cheng@intel.com> Closes #6257 from chenghao-intel/rollup and squashes the following commits: 7302319 [Cheng Hao] cancel the implicit keyword a66e38f [Cheng Hao] remove the unnecessary code changes a2869d4 [Cheng Hao] update the code as comments c441777 [Cheng Hao] update the code as suggested 84c9564 [Cheng Hao] Remove the CubedData & RollupedData 279584c [Cheng Hao] hiden the CubedData & RollupedData ef357e1 [Cheng Hao] Add Cube / Rollup for dataframe
author: Cheng Hao <hao.cheng@intel.com> 2015-05-20 19:09:47 +0800
committer: Cheng Lian <lian@databricks.com> 2015-05-20 19:09:47 +0800
commit: 09265ad7c85c6de6b568ec329daad632d4a79fa3 (patch)
tree: 456111dab186b98723d6c443624ee64a42237ad2 /sql/hive/src
parent: b3abf0b8d9bca13840eb759953d76905c2ba9b8a (diff)
download: spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.tar.gz
spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.tar.bz2
spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.zip
1 files changed, 62 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
new file mode 100644
index 0000000000..3ad05f4825
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+case class TestData2Int(a: Int, b: Int)
+
+// TODO ideally we should put the test suite into the package `sql`, as
+// `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
+// support the `cube` or `rollup` yet.
+class HiveDataFrameAnalyticsSuite extends QueryTest {
+  val testData =
+    TestHive.sparkContext.parallelize(
+      TestData2Int(1, 2) ::
+        TestData2Int(2, 4) :: Nil).toDF()
+
+  testData.registerTempTable("mytable")
+
+  test("rollup") {
+    checkAnswer(
+      testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect()
+    )
+
+    checkAnswer(
+      testData.rollup("a", "b").agg(sum("b")),
+      sql("select a, b, sum(b) from mytable group by a, b with rollup").collect()
+    )
+  }
+
+  test("cube") {
+    checkAnswer(
+      testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+      sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect()
+    )
+
+    checkAnswer(
+      testData.cube("a", "b").agg(sum("b")),
+      sql("select a, b, sum(b) from mytable group by a, b with cube").collect()
+    )
+  }
+}
author	Cheng Hao <hao.cheng@intel.com>	2015-05-20 19:09:47 +0800
committer	Cheng Lian <lian@databricks.com>	2015-05-20 19:09:47 +0800
commit	09265ad7c85c6de6b568ec329daad632d4a79fa3 (patch)
tree	456111dab186b98723d6c443624ee64a42237ad2 /sql/hive/src
parent	b3abf0b8d9bca13840eb759953d76905c2ba9b8a (diff)
download	spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.tar.gz spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.tar.bz2 spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.zip