aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-06-14 09:58:06 -0700
committerWenchen Fan <wenchen@databricks.com>2016-06-14 09:58:06 -0700
commitbc02d011294fcd1ab07b9baf1011c3f2bdf749d9 (patch)
tree2d34c878c10e91a16046cb28c4b4f4c2a53d27cb /sql
parent6151d2641f91c8e3ec0c324e78afb46cdb2ef111 (diff)
downloadspark-bc02d011294fcd1ab07b9baf1011c3f2bdf749d9.tar.gz
spark-bc02d011294fcd1ab07b9baf1011c3f2bdf749d9.tar.bz2
spark-bc02d011294fcd1ab07b9baf1011c3f2bdf749d9.zip
[SPARK-15655][SQL] Fix Wrong Partition Column Order when Fetching Partitioned Tables
#### What changes were proposed in this pull request? When fetching the partitioned table, the output contains wrong results. The order of partition key values do not match the order of partition key columns in output schema. For example, ```SQL CREATE TABLE table_with_partition(c1 string) PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string) INSERT OVERWRITE TABLE table_with_partition PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e') SELECT 'blarr' SELECT p1, p2, p3, p4, p5, c1 FROM table_with_partition ``` ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | d| e| c| b| a|blarr| +---+---+---+---+---+-----+ ``` The expected result should be ``` +---+---+---+---+---+-----+ | p1| p2| p3| p4| p5| c1| +---+---+---+---+---+-----+ | a| b| c| d| e|blarr| +---+---+---+---+---+-----+ ``` This PR is to fix this by enforcing the order matches the table partition definition. #### How was this patch tested? Added a test case into `SQLQuerySuite` Author: gatorsmile <gatorsmile@gmail.com> Closes #13400 from gatorsmile/partitionedTableFetch.
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala32
2 files changed, 33 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
index 9c820144ae..5596a4470f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@@ -160,7 +160,7 @@ private[hive] case class MetastoreRelation(
val tPartition = new org.apache.hadoop.hive.metastore.api.Partition
tPartition.setDbName(databaseName)
tPartition.setTableName(tableName)
- tPartition.setValues(p.spec.values.toList.asJava)
+ tPartition.setValues(partitionKeys.map(a => p.spec(a.name)).asJava)
val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor()
tPartition.setSd(sd)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1a0eaa66c1..9c1f218253 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1610,6 +1610,38 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
assert(fs.exists(path), "This is an external table, so the data should not have been dropped")
}
+ test("select partitioned table") {
+ val table = "table_with_partition"
+ withTable(table) {
+ sql(
+ s"""
+ |CREATE TABLE $table(c1 string)
+ |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string)
+ """.stripMargin)
+ sql(
+ s"""
+ |INSERT OVERWRITE TABLE $table
+ |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
+ |SELECT 'blarr'
+ """.stripMargin)
+
+ // project list is the same order of paritioning columns in table definition
+ checkAnswer(
+ sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"),
+ Row("a", "b", "c", "d", "e", "blarr") :: Nil)
+
+ // project list does not have the same order of paritioning columns in table definition
+ checkAnswer(
+ sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"),
+ Row("b", "c", "d", "a", "e", "blarr") :: Nil)
+
+ // project list contains partial partition columns in table definition
+ checkAnswer(
+ sql(s"SELECT p2, p1, p5, c1 FROM $table"),
+ Row("b", "a", "e", "blarr") :: Nil)
+ }
+ }
+
test("SPARK-14981: DESC not supported for sorting columns") {
withTable("t") {
val cause = intercept[ParseException] {