aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-05-20 19:09:47 +0800
committerCheng Lian <lian@databricks.com>2015-05-20 19:09:47 +0800
commit09265ad7c85c6de6b568ec329daad632d4a79fa3 (patch)
tree456111dab186b98723d6c443624ee64a42237ad2 /sql/hive
parentb3abf0b8d9bca13840eb759953d76905c2ba9b8a (diff)
downloadspark-09265ad7c85c6de6b568ec329daad632d4a79fa3.tar.gz
spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.tar.bz2
spark-09265ad7c85c6de6b568ec329daad632d4a79fa3.zip
[SPARK-7320] [SQL] Add Cube / Rollup for dataframe
Add `cube` & `rollup` for DataFrame For example: ```scala testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")) testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")) ``` Author: Cheng Hao <hao.cheng@intel.com> Closes #6257 from chenghao-intel/rollup and squashes the following commits: 7302319 [Cheng Hao] cancel the implicit keyword a66e38f [Cheng Hao] remove the unnecessary code changes a2869d4 [Cheng Hao] update the code as comments c441777 [Cheng Hao] update the code as suggested 84c9564 [Cheng Hao] Remove the CubedData & RollupedData 279584c [Cheng Hao] hiden the CubedData & RollupedData ef357e1 [Cheng Hao] Add Cube / Rollup for dataframe
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala62
1 files changed, 62 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
new file mode 100644
index 0000000000..3ad05f4825
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.TestHive._
+import org.apache.spark.sql.hive.test.TestHive.implicits._
+
+case class TestData2Int(a: Int, b: Int)
+
+// TODO ideally we should put the test suite into the package `sql`, as
+// `hive` package is optional in compiling, however, `SQLContext.sql` doesn't
+// support the `cube` or `rollup` yet.
+class HiveDataFrameAnalyticsSuite extends QueryTest {
+ val testData =
+ TestHive.sparkContext.parallelize(
+ TestData2Int(1, 2) ::
+ TestData2Int(2, 4) :: Nil).toDF()
+
+ testData.registerTempTable("mytable")
+
+ test("rollup") {
+ checkAnswer(
+ testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+ sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect()
+ )
+
+ checkAnswer(
+ testData.rollup("a", "b").agg(sum("b")),
+ sql("select a, b, sum(b) from mytable group by a, b with rollup").collect()
+ )
+ }
+
+ test("cube") {
+ checkAnswer(
+ testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")),
+ sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect()
+ )
+
+ checkAnswer(
+ testData.cube("a", "b").agg(sum("b")),
+ sql("select a, b, sum(b) from mytable group by a, b with cube").collect()
+ )
+ }
+}