diff options
author | gatorsmile <gatorsmile@gmail.com> | 2017-01-03 11:43:47 -0800 |
---|---|---|
committer | gatorsmile <gatorsmile@gmail.com> | 2017-01-03 11:43:47 -0800 |
commit | b67b35f76b684c5176dc683e7491fd01b43f4467 (patch) | |
tree | 22c57087665b0e490c692ebe3f87fa6bd189b4d3 /sql/hive | |
parent | 89bf370e4f53c02b018b23adc653cd718869489e (diff) | |
download | spark-b67b35f76b684c5176dc683e7491fd01b43f4467.tar.gz spark-b67b35f76b684c5176dc683e7491fd01b43f4467.tar.bz2 spark-b67b35f76b684c5176dc683e7491fd01b43f4467.zip |
[SPARK-19048][SQL] Delete Partition Location when Dropping Managed Partitioned Tables in InMemoryCatalog
### What changes were proposed in this pull request?
The data in the managed table should be deleted after table is dropped. However, if the partition location is not under the location of the partitioned table, it is not deleted as expected. Users can specify any location for the partition when they adding a partition.
This PR is to delete partition location when dropping managed partitioned tables stored in `InMemoryCatalog`.
### How was this patch tested?
Added test cases for both HiveExternalCatalog and InMemoryCatalog
Author: gatorsmile <gatorsmile@gmail.com>
Closes #16448 from gatorsmile/unsetSerdeProp.
Diffstat (limited to 'sql/hive')
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index f313db641b..8b34219530 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -199,6 +199,52 @@ class HiveDDLSuite assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a") } + test("add/drop partition with location - managed table") { + val tab = "tab_with_partitions" + withTempDir { tmpDir => + val basePath = new File(tmpDir.getCanonicalPath) + val part1Path = new File(basePath + "/part1") + val part2Path = new File(basePath + "/part2") + val dirSet = part1Path :: part2Path :: Nil + + // Before data insertion, all the directory are empty + assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty)) + + withTable(tab) { + sql( + s""" + |CREATE TABLE $tab (key INT, value STRING) + |PARTITIONED BY (ds STRING, hr STRING) + """.stripMargin) + sql( + s""" + |ALTER TABLE $tab ADD + |PARTITION (ds='2008-04-08', hr=11) LOCATION '$part1Path' + |PARTITION (ds='2008-04-08', hr=12) LOCATION '$part2Path' + """.stripMargin) + assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty)) + + sql(s"INSERT OVERWRITE TABLE $tab partition (ds='2008-04-08', hr=11) SELECT 1, 'a'") + sql(s"INSERT OVERWRITE TABLE $tab partition (ds='2008-04-08', hr=12) SELECT 2, 'b'") + // add partition will not delete the data + assert(dirSet.forall(dir => dir.listFiles.nonEmpty)) + checkAnswer( + spark.table(tab), + Row(1, "a", "2008-04-08", "11") :: Row(2, "b", "2008-04-08", "12") :: Nil + ) + + sql(s"ALTER TABLE $tab DROP PARTITION (ds='2008-04-08', hr=11)") + // drop partition will delete the data + assert(part1Path.listFiles == null || part1Path.listFiles.isEmpty) + assert(part2Path.listFiles.nonEmpty) + + sql(s"DROP TABLE $tab") + // drop table will delete the data of the managed table + assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty)) + } + } + } + test("add/drop partitions - external table") { val catalog = spark.sessionState.catalog withTempDir { tmpDir => @@ -257,9 +303,15 @@ class HiveDDLSuite // drop partition will not delete the data of external table assert(dirSet.forall(dir => dir.listFiles.nonEmpty)) - sql(s"ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')") + sql( + s""" + |ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12') + |PARTITION (ds='2008-04-08', hr=11) + """.stripMargin) assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet == - Set(Map("ds" -> "2008-04-08", "hr" -> "12"), Map("ds" -> "2008-04-09", "hr" -> "11"))) + Set(Map("ds" -> "2008-04-08", "hr" -> "11"), + Map("ds" -> "2008-04-08", "hr" -> "12"), + Map("ds" -> "2008-04-09", "hr" -> "11"))) // add partition will not delete the data assert(dirSet.forall(dir => dir.listFiles.nonEmpty)) |