aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/scala/org
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2017-01-03 11:43:47 -0800
committergatorsmile <gatorsmile@gmail.com>2017-01-03 11:43:47 -0800
commitb67b35f76b684c5176dc683e7491fd01b43f4467 (patch)
tree22c57087665b0e490c692ebe3f87fa6bd189b4d3 /sql/hive/src/test/scala/org
parent89bf370e4f53c02b018b23adc653cd718869489e (diff)
downloadspark-b67b35f76b684c5176dc683e7491fd01b43f4467.tar.gz
spark-b67b35f76b684c5176dc683e7491fd01b43f4467.tar.bz2
spark-b67b35f76b684c5176dc683e7491fd01b43f4467.zip
[SPARK-19048][SQL] Delete Partition Location when Dropping Managed Partitioned Tables in InMemoryCatalog
### What changes were proposed in this pull request? The data in the managed table should be deleted after table is dropped. However, if the partition location is not under the location of the partitioned table, it is not deleted as expected. Users can specify any location for the partition when they adding a partition. This PR is to delete partition location when dropping managed partitioned tables stored in `InMemoryCatalog`. ### How was this patch tested? Added test cases for both HiveExternalCatalog and InMemoryCatalog Author: gatorsmile <gatorsmile@gmail.com> Closes #16448 from gatorsmile/unsetSerdeProp.
Diffstat (limited to 'sql/hive/src/test/scala/org')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala56
1 files changed, 54 insertions, 2 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f313db641b..8b34219530 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -199,6 +199,52 @@ class HiveDDLSuite
assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a")
}
+ test("add/drop partition with location - managed table") {
+ val tab = "tab_with_partitions"
+ withTempDir { tmpDir =>
+ val basePath = new File(tmpDir.getCanonicalPath)
+ val part1Path = new File(basePath + "/part1")
+ val part2Path = new File(basePath + "/part2")
+ val dirSet = part1Path :: part2Path :: Nil
+
+ // Before data insertion, all the directory are empty
+ assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
+
+ withTable(tab) {
+ sql(
+ s"""
+ |CREATE TABLE $tab (key INT, value STRING)
+ |PARTITIONED BY (ds STRING, hr STRING)
+ """.stripMargin)
+ sql(
+ s"""
+ |ALTER TABLE $tab ADD
+ |PARTITION (ds='2008-04-08', hr=11) LOCATION '$part1Path'
+ |PARTITION (ds='2008-04-08', hr=12) LOCATION '$part2Path'
+ """.stripMargin)
+ assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
+
+ sql(s"INSERT OVERWRITE TABLE $tab partition (ds='2008-04-08', hr=11) SELECT 1, 'a'")
+ sql(s"INSERT OVERWRITE TABLE $tab partition (ds='2008-04-08', hr=12) SELECT 2, 'b'")
+ // add partition will not delete the data
+ assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
+ checkAnswer(
+ spark.table(tab),
+ Row(1, "a", "2008-04-08", "11") :: Row(2, "b", "2008-04-08", "12") :: Nil
+ )
+
+ sql(s"ALTER TABLE $tab DROP PARTITION (ds='2008-04-08', hr=11)")
+ // drop partition will delete the data
+ assert(part1Path.listFiles == null || part1Path.listFiles.isEmpty)
+ assert(part2Path.listFiles.nonEmpty)
+
+ sql(s"DROP TABLE $tab")
+ // drop table will delete the data of the managed table
+ assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
+ }
+ }
+ }
+
test("add/drop partitions - external table") {
val catalog = spark.sessionState.catalog
withTempDir { tmpDir =>
@@ -257,9 +303,15 @@ class HiveDDLSuite
// drop partition will not delete the data of external table
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
- sql(s"ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')")
+ sql(
+ s"""
+ |ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')
+ |PARTITION (ds='2008-04-08', hr=11)
+ """.stripMargin)
assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet ==
- Set(Map("ds" -> "2008-04-08", "hr" -> "12"), Map("ds" -> "2008-04-09", "hr" -> "11")))
+ Set(Map("ds" -> "2008-04-08", "hr" -> "11"),
+ Map("ds" -> "2008-04-08", "hr" -> "12"),
+ Map("ds" -> "2008-04-09", "hr" -> "11")))
// add partition will not delete the data
assert(dirSet.forall(dir => dir.listFiles.nonEmpty))