[SPARK-19359][SQL] Revert Clear useless path after rename a partition with upper-case by HiveExternalCatalog

### What changes were proposed in this pull request? This PR is to revert the changes made in https://github.com/apache/spark/pull/16700. It could cause the data loss after partition rename, because we have a bug in the file renaming. Not all the OSs have the same behaviors. For example, on mac OS, if we renaming a path from `.../tbl/a=5/b=6` to `.../tbl/A=5/B=6`. The result is `.../tbl/a=5/B=6`. The expected result is `.../tbl/A=5/B=6`. Thus, renaming on mac OS is not recursive. However, the systems used in Jenkin does not have such an issue. Although this PR is not the root cause, it exposes an existing issue on the code `tablePath.getFileSystem(hadoopConf).rename(wrongPath, rightPath)` --- Hive metastore is not case preserving and keep partition columns with lower case names. If SparkSQL create a table with upper-case partion name use HiveExternalCatalog, when we rename partition, it first call the HiveClient to renamePartition, which will create a new lower case partition path, then SparkSql rename the lower case path to the upper-case. while if the renamed partition contains more than one depth partition ,e.g. A=1/B=2, hive renamePartition change to a=1/b=2, then SparkSql rename it to A=1/B=2, but the a=1 still exists in the filesystem, we should also delete it. ### How was this patch tested? N/A Author: gatorsmile <gatorsmile@gmail.com> Closes #16728 from gatorsmile/revert-pr-16700.
author: gatorsmile <gatorsmile@gmail.com> 2017-01-28 13:32:30 -0800
committer: gatorsmile <gatorsmile@gmail.com> 2017-01-28 13:32:30 -0800
commit: cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434 (patch)
tree: dd98fc80135028838c8bca14c83902fdc1ebd5d7 /sql/hive/src/test/scala/org
parent: 42ad93b2c9047a68c14cbf681508157101f43c0e (diff)
download: spark-cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434.tar.gz
spark-cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434.tar.bz2
spark-cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434.zip
1 files changed, 0 insertions, 36 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 1214a92b76..dca207a72d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -19,11 +19,8 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 
-import org.apache.hadoop.fs.Path
-
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.{AnalysisException, QueryTest}
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -484,37 +481,4 @@ class PartitionProviderCompatibilitySuite
       assert(spark.sql("show partitions test").count() == 5)
     }
   }
-
-  test("partition path created by Hive should be deleted after renamePartitions with upper-case") {
-    withTable("t", "t1", "t2") {
-      Seq((1, 2, 3)).toDF("id", "A", "B").write.partitionBy("A", "B").saveAsTable("t")
-      spark.sql("alter table t partition(A=2, B=3) rename to partition(A=4, B=5)")
-
-      var table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-      var extraHivePath = new Path(table.location + "/a=4")
-      assert(!extraHivePath.getFileSystem(spark.sessionState.newHadoopConf())
-        .exists(extraHivePath), "partition path created by Hive should be deleted " +
-        "after renamePartitions with upper-case")
-
-      Seq((1, 2, 3, 4)).toDF("id", "A", "B", "C").write.partitionBy("A", "B", "C").saveAsTable("t1")
-      spark.sql("alter table t1 partition(A=2, B=3, C=4) rename to partition(A=5, B=6, C=7)")
-      table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
-      extraHivePath = new Path(table.location + "/a=5")
-      assert(!extraHivePath.getFileSystem(spark.sessionState.newHadoopConf())
-        .exists(extraHivePath), "partition path created by Hive should be deleted " +
-        "after renamePartitions with upper-case")
-
-      Seq((1, 2, 3, 4)).toDF("id", "a", "B", "C").write.partitionBy("a", "B", "C").saveAsTable("t2")
-      spark.sql("alter table t2 partition(a=2, B=3, C=4) rename to partition(a=4, B=5, C=6)")
-      table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t2"))
-      val partPath = new Path(table.location + "/a=4")
-      assert(partPath.getFileSystem(spark.sessionState.newHadoopConf())
-        .exists(partPath), "partition path of lower-case partition name should not be deleted")
-
-      extraHivePath = new Path(table.location + "/a=4/b=5")
-      assert(!extraHivePath.getFileSystem(spark.sessionState.newHadoopConf())
-        .exists(extraHivePath), "partition path created by Hive should be deleted " +
-        "after renamePartitions with upper-case")
-    }
-  }
 }
author	gatorsmile <gatorsmile@gmail.com>	2017-01-28 13:32:30 -0800
committer	gatorsmile <gatorsmile@gmail.com>	2017-01-28 13:32:30 -0800
commit	cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434 (patch)
tree	dd98fc80135028838c8bca14c83902fdc1ebd5d7 /sql/hive/src/test/scala/org
parent	42ad93b2c9047a68c14cbf681508157101f43c0e (diff)
download	spark-cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434.tar.gz spark-cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434.tar.bz2 spark-cfcfc92f7bbdfd3a8b5e3948ae2f95c74d470434.zip