From e2318ede04fa7a756d1c8151775e1f2406a176ca Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 28 Nov 2016 21:58:01 -0800 Subject: [SPARK-18544][SQL] Append with df.saveAsTable writes data to wrong location ## What changes were proposed in this pull request? We failed to properly propagate table metadata for existing tables for the saveAsTable command. This caused a downstream component to think the table was MANAGED, writing data to the wrong location. ## How was this patch tested? Unit test that fails before the patch. Author: Eric Liang Closes #15983 from ericl/spark-18544. --- .../hive/PartitionProviderCompatibilitySuite.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'sql/hive/src/test') diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala index a1aa07456f..cace5fa95c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala @@ -188,6 +188,25 @@ class PartitionProviderCompatibilitySuite } } + for (enabled <- Seq(true, false)) { + test(s"SPARK-18544 append with saveAsTable - partition management $enabled") { + withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> enabled.toString) { + withTable("test") { + withTempDir { dir => + setupPartitionedDatasourceTable("test", dir) + if (enabled) { + spark.sql("msck repair table test") + } + assert(spark.sql("select * from test").count() == 5) + spark.range(10).selectExpr("id as fieldOne", "id as partCol") + .write.partitionBy("partCol").mode("append").saveAsTable("test") + assert(spark.sql("select * from test").count() == 15) + } + } + } + } + } + /** * Runs a test against a multi-level partitioned table, then validates that the custom locations * were respected by the output writer. -- cgit v1.2.3