aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/scala/org
diff options
context:
space:
mode:
authorwindpiger <songjun@outlook.com>2017-01-27 17:17:17 -0800
committergatorsmile <gatorsmile@gmail.com>2017-01-27 17:17:17 -0800
commit1b5ee2003c368d18a5f8c17c2a869ef5770c60a1 (patch)
treed73dcb492c9e0b14e868a68eff748a498af5ecc9 /sql/hive/src/test/scala/org
parentbb1a1fe05e293c480c88123d4c83a6b8c25f6e2e (diff)
downloadspark-1b5ee2003c368d18a5f8c17c2a869ef5770c60a1.tar.gz
spark-1b5ee2003c368d18a5f8c17c2a869ef5770c60a1.tar.bz2
spark-1b5ee2003c368d18a5f8c17c2a869ef5770c60a1.zip
[SPARK-19359][SQL] clear useless path after rename a partition with upper-case by HiveExternalCatalog
## What changes were proposed in this pull request? Hive metastore is not case preserving and keep partition columns with lower case names. If SparkSQL create a table with upper-case partion name use HiveExternalCatalog, when we rename partition, it first call the HiveClient to renamePartition, which will create a new lower case partition path, then SparkSql rename the lower case path to the upper-case. while if the renamed partition contains more than one depth partition ,e.g. A=1/B=2, hive renamePartition change to a=1/b=2, then SparkSql rename it to A=1/B=2, but the a=1 still exists in the filesystem, we should also delete it. ## How was this patch tested? unit test added Author: windpiger <songjun@outlook.com> Closes #16700 from windpiger/clearUselessPathAfterRenamPartition.
Diffstat (limited to 'sql/hive/src/test/scala/org')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala36
1 files changed, 36 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index dca207a72d..1214a92b76 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -19,8 +19,11 @@ package org.apache.spark.sql.hive
import java.io.File
+import org.apache.hadoop.fs.Path
+
import org.apache.spark.metrics.source.HiveCatalogMetrics
import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
@@ -481,4 +484,37 @@ class PartitionProviderCompatibilitySuite
assert(spark.sql("show partitions test").count() == 5)
}
}
+
+ test("partition path created by Hive should be deleted after renamePartitions with upper-case") {
+ withTable("t", "t1", "t2") {
+ Seq((1, 2, 3)).toDF("id", "A", "B").write.partitionBy("A", "B").saveAsTable("t")
+ spark.sql("alter table t partition(A=2, B=3) rename to partition(A=4, B=5)")
+
+ var table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+ var extraHivePath = new Path(table.location + "/a=4")
+ assert(!extraHivePath.getFileSystem(spark.sessionState.newHadoopConf())
+ .exists(extraHivePath), "partition path created by Hive should be deleted " +
+ "after renamePartitions with upper-case")
+
+ Seq((1, 2, 3, 4)).toDF("id", "A", "B", "C").write.partitionBy("A", "B", "C").saveAsTable("t1")
+ spark.sql("alter table t1 partition(A=2, B=3, C=4) rename to partition(A=5, B=6, C=7)")
+ table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
+ extraHivePath = new Path(table.location + "/a=5")
+ assert(!extraHivePath.getFileSystem(spark.sessionState.newHadoopConf())
+ .exists(extraHivePath), "partition path created by Hive should be deleted " +
+ "after renamePartitions with upper-case")
+
+ Seq((1, 2, 3, 4)).toDF("id", "a", "B", "C").write.partitionBy("a", "B", "C").saveAsTable("t2")
+ spark.sql("alter table t2 partition(a=2, B=3, C=4) rename to partition(a=4, B=5, C=6)")
+ table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
+ val partPath = new Path(table.location + "/a=4")
+ assert(partPath.getFileSystem(spark.sessionState.newHadoopConf())
+ .exists(partPath), "partition path of lower-case partition name should not be deleted")
+
+ extraHivePath = new Path(table.location + "/a=4/b=5")
+ assert(!extraHivePath.getFileSystem(spark.sessionState.newHadoopConf())
+ .exists(extraHivePath), "partition path created by Hive should be deleted " +
+ "after renamePartitions with upper-case")
+ }
+ }
}