[SPARK-18899][SPARK-18912][SPARK-18913][SQL] refactor the error checking when append data to an existing table

## What changes were proposed in this pull request? When we append data to an existing table with `DataFrameWriter.saveAsTable`, we will do various checks to make sure the appended data is consistent with the existing data. However, we get the information of the existing table by matching the table relation, instead of looking at the table metadata. This is error-prone, e.g. we only check the number of columns for `HadoopFsRelation`, we forget to check bucketing, etc. This PR refactors the error checking by looking at the metadata of the existing table, and fix several bugs: * SPARK-18899: We forget to check if the specified bucketing matched the existing table, which may lead to a problematic table that has different bucketing in different data files. * SPARK-18912: We forget to check the number of columns for non-file-based data source table * SPARK-18913: We don't support append data to a table with special column names. ## How was this patch tested? new regression test. Author: Wenchen Fan <wenchen@databricks.com> Closes #16313 from cloud-fan/bug1.
author: Wenchen Fan <wenchen@databricks.com> 2016-12-19 20:03:33 -0800
committer: gatorsmile <gatorsmile@gmail.com> 2016-12-19 20:03:33 -0800
commit: f923c849e5b8f7e7aeafee59db598a9bf4970f50 (patch)
tree: f4e44fd588215880e00bc481e4dce4f08cf80594 /sql/hive/src/test
parent: fa829ce21fb84028d90b739a49c4ece70a17ccfd (diff)
download: spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.tar.gz
spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.tar.bz2
spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.zip
2 files changed, 8 insertions, 11 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index a45f4b5d63..deb40f0464 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -422,7 +422,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       val e = intercept[AnalysisException] {
         df.write.mode(SaveMode.Append).saveAsTable(tableName)
       }.getMessage
-      assert(e.contains("Saving data in the Hive serde table `default`.`tab1` is not supported " +
+      assert(e.contains("Saving data in the Hive serde table default.tab1 is not supported " +
         "yet. Please use the insertInto() API as an alternative."))
 
       df.write.insertInto(tableName)
@@ -928,9 +928,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         createDF(10, 19).write.mode(SaveMode.Append).format("orc").saveAsTable("appendOrcToParquet")
       }
       assert(e.getMessage.contains(
-        "The file format of the existing table default.appendOrcToParquet " +
-        "is `org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat`. " +
-        "It doesn't match the specified format `orc`"))
+        "The format of the existing table default.appendOrcToParquet is `ParquetFileFormat`. " +
+          "It doesn't match the specified format `OrcFileFormat`"))
     }
 
     withTable("appendParquetToJson") {
@@ -940,9 +939,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           .saveAsTable("appendParquetToJson")
       }
       assert(e.getMessage.contains(
-        "The file format of the existing table default.appendParquetToJson " +
-        "is `org.apache.spark.sql.execution.datasources.json.JsonFileFormat`. " +
-        "It doesn't match the specified format `parquet`"))
+        "The format of the existing table default.appendParquetToJson is `JsonFileFormat`. " +
+        "It doesn't match the specified format `ParquetFileFormat`"))
     }
 
     withTable("appendTextToJson") {
@@ -952,9 +950,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           .saveAsTable("appendTextToJson")
       }
       assert(e.getMessage.contains(
-        "The file format of the existing table default.appendTextToJson is " +
-        "`org.apache.spark.sql.execution.datasources.json.JsonFileFormat`. " +
-        "It doesn't match the specified format `text`"))
+        "The format of the existing table default.appendTextToJson is `JsonFileFormat`. " +
+        "It doesn't match the specified format `TextFileFormat`"))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 22f13a494c..224b2c6c6f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -446,7 +446,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
       .saveAsTable("t")
 
     // Using only a subset of all partition columns
-    intercept[Throwable] {
+    intercept[AnalysisException] {
       partitionedTestDF2.write
         .format(dataSourceName)
         .mode(SaveMode.Append)
author	Wenchen Fan <wenchen@databricks.com>	2016-12-19 20:03:33 -0800
committer	gatorsmile <gatorsmile@gmail.com>	2016-12-19 20:03:33 -0800
commit	f923c849e5b8f7e7aeafee59db598a9bf4970f50 (patch)
tree	f4e44fd588215880e00bc481e4dce4f08cf80594 /sql/hive/src/test
parent	fa829ce21fb84028d90b739a49c4ece70a17ccfd (diff)
download	spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.tar.gz spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.tar.bz2 spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.zip