aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-07-15 23:35:27 -0700
committerYin Huai <yhuai@databricks.com>2015-07-15 23:35:27 -0700
commite27212317c7341852c52d9a85137b8f94cb0d935 (patch)
tree91d508dff350e5122f7e662b0f36e15bc15ecc86 /sql/hive
parentba33096846dc8061e97a7bf8f3b46f899d530159 (diff)
downloadspark-e27212317c7341852c52d9a85137b8f94cb0d935.tar.gz
spark-e27212317c7341852c52d9a85137b8f94cb0d935.tar.bz2
spark-e27212317c7341852c52d9a85137b8f94cb0d935.zip
[SPARK-8972] [SQL] Incorrect result for rollup
We don't support the complex expression keys in the rollup/cube, and we even will not report it if we have the complex group by keys, that will cause very confusing/incorrect result. e.g. `SELECT key%100 FROM src GROUP BY key %100 with ROLLUP` This PR adds an additional project during the analyzing for the complex GROUP BY keys, and that projection will be the child of `Expand`, so to `Expand`, the GROUP BY KEY are always the simple key(attribute names). Author: Cheng Hao <hao.cheng@intel.com> Closes #7343 from chenghao-intel/expand and squashes the following commits: 1ebbb59 [Cheng Hao] update the comment 827873f [Cheng Hao] update as feedback 34def69 [Cheng Hao] Add more unit test and comments c695760 [Cheng Hao] fix bug of incorrect result for rollup
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #1-0-63b61fb3f0e74226001ad279be4408646
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #2-0-7a511f02a16f0af4f810b1666cfcd89610
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for GroupingSet-0-8c14c24670a4b06c440346277ce9cf1c10
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #1-0-a78e3dbf242f240249e36b3d3fd0926a6
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #2-0-bf180c9d1a18f61b9d9f31bb0115cf8910
-rw-r--r--sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #3-0-9257085d123728730be96b6d9fbb84ce10
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala54
7 files changed, 106 insertions, 0 deletions
diff --git a/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #1-0-63b61fb3f0e74226001ad279be440864 b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #1-0-63b61fb3f0e74226001ad279be440864
new file mode 100644
index 0000000000..dac1b84b91
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #1-0-63b61fb3f0e74226001ad279be440864
@@ -0,0 +1,6 @@
+500 NULL 0
+91 0 1
+84 1 1
+105 2 1
+113 3 1
+107 4 1
diff --git a/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #2-0-7a511f02a16f0af4f810b1666cfcd896 b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #2-0-7a511f02a16f0af4f810b1666cfcd896
new file mode 100644
index 0000000000..c7cb747c0a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for CUBE #2-0-7a511f02a16f0af4f810b1666cfcd896
@@ -0,0 +1,10 @@
+1 NULL -3 2
+1 NULL -1 2
+1 NULL 3 2
+1 NULL 4 2
+1 NULL 5 2
+1 NULL 6 2
+1 NULL 12 2
+1 NULL 14 2
+1 NULL 15 2
+1 NULL 22 2
diff --git a/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for GroupingSet-0-8c14c24670a4b06c440346277ce9cf1c b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for GroupingSet-0-8c14c24670a4b06c440346277ce9cf1c
new file mode 100644
index 0000000000..c7cb747c0a
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for GroupingSet-0-8c14c24670a4b06c440346277ce9cf1c
@@ -0,0 +1,10 @@
+1 NULL -3 2
+1 NULL -1 2
+1 NULL 3 2
+1 NULL 4 2
+1 NULL 5 2
+1 NULL 6 2
+1 NULL 12 2
+1 NULL 14 2
+1 NULL 15 2
+1 NULL 22 2
diff --git a/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #1-0-a78e3dbf242f240249e36b3d3fd0926a b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #1-0-a78e3dbf242f240249e36b3d3fd0926a
new file mode 100644
index 0000000000..dac1b84b91
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #1-0-a78e3dbf242f240249e36b3d3fd0926a
@@ -0,0 +1,6 @@
+500 NULL 0
+91 0 1
+84 1 1
+105 2 1
+113 3 1
+107 4 1
diff --git a/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #2-0-bf180c9d1a18f61b9d9f31bb0115cf89 b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #2-0-bf180c9d1a18f61b9d9f31bb0115cf89
new file mode 100644
index 0000000000..1eea4a9b23
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #2-0-bf180c9d1a18f61b9d9f31bb0115cf89
@@ -0,0 +1,10 @@
+1 0 5 3
+1 0 15 3
+1 0 25 3
+1 0 60 3
+1 0 75 3
+1 0 80 3
+1 0 100 3
+1 0 140 3
+1 0 145 3
+1 0 150 3
diff --git a/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #3-0-9257085d123728730be96b6d9fbb84ce b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #3-0-9257085d123728730be96b6d9fbb84ce
new file mode 100644
index 0000000000..1eea4a9b23
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/SPARK-8976 Wrong Result for Rollup #3-0-9257085d123728730be96b6d9fbb84ce
@@ -0,0 +1,10 @@
+1 0 5 3
+1 0 15 3
+1 0 25 3
+1 0 60 3
+1 0 75 3
+1 0 80 3
+1 0 100 3
+1 0 140 3
+1 0 145 3
+1 0 150 3
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 991da2f829..11a843becc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -85,6 +85,60 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
}
}
+ createQueryTest("SPARK-8976 Wrong Result for Rollup #1",
+ """
+ SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH ROLLUP
+ """.stripMargin)
+
+ createQueryTest("SPARK-8976 Wrong Result for Rollup #2",
+ """
+ SELECT
+ count(*) AS cnt,
+ key % 5 as k1,
+ key-5 as k2,
+ GROUPING__ID as k3
+ FROM src group by key%5, key-5
+ WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
+ """.stripMargin)
+
+ createQueryTest("SPARK-8976 Wrong Result for Rollup #3",
+ """
+ SELECT
+ count(*) AS cnt,
+ key % 5 as k1,
+ key-5 as k2,
+ GROUPING__ID as k3
+ FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
+ WITH ROLLUP ORDER BY cnt, k1, k2, k3 LIMIT 10
+ """.stripMargin)
+
+ createQueryTest("SPARK-8976 Wrong Result for CUBE #1",
+ """
+ SELECT count(*) AS cnt, key % 5,GROUPING__ID FROM src group by key%5 WITH CUBE
+ """.stripMargin)
+
+ createQueryTest("SPARK-8976 Wrong Result for CUBE #2",
+ """
+ SELECT
+ count(*) AS cnt,
+ key % 5 as k1,
+ key-5 as k2,
+ GROUPING__ID as k3
+ FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
+ WITH CUBE ORDER BY cnt, k1, k2, k3 LIMIT 10
+ """.stripMargin)
+
+ createQueryTest("SPARK-8976 Wrong Result for GroupingSet",
+ """
+ SELECT
+ count(*) AS cnt,
+ key % 5 as k1,
+ key-5 as k2,
+ GROUPING__ID as k3
+ FROM (SELECT key, key%2, key - 5 FROM src) t group by key%5, key-5
+ GROUPING SETS (key%5, key-5) ORDER BY cnt, k1, k2, k3 LIMIT 10
+ """.stripMargin)
+
createQueryTest("insert table with generator with column name",
"""
| CREATE TABLE gen_tmp (key Int);