aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-12-19 20:03:33 -0800
committergatorsmile <gatorsmile@gmail.com>2016-12-19 20:03:33 -0800
commitf923c849e5b8f7e7aeafee59db598a9bf4970f50 (patch)
treef4e44fd588215880e00bc481e4dce4f08cf80594 /sql/hive/src/test
parentfa829ce21fb84028d90b739a49c4ece70a17ccfd (diff)
downloadspark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.tar.gz
spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.tar.bz2
spark-f923c849e5b8f7e7aeafee59db598a9bf4970f50.zip
[SPARK-18899][SPARK-18912][SPARK-18913][SQL] refactor the error checking when append data to an existing table
## What changes were proposed in this pull request? When we append data to an existing table with `DataFrameWriter.saveAsTable`, we will do various checks to make sure the appended data is consistent with the existing data. However, we get the information of the existing table by matching the table relation, instead of looking at the table metadata. This is error-prone, e.g. we only check the number of columns for `HadoopFsRelation`, we forget to check bucketing, etc. This PR refactors the error checking by looking at the metadata of the existing table, and fix several bugs: * SPARK-18899: We forget to check if the specified bucketing matched the existing table, which may lead to a problematic table that has different bucketing in different data files. * SPARK-18912: We forget to check the number of columns for non-file-based data source table * SPARK-18913: We don't support append data to a table with special column names. ## How was this patch tested? new regression test. Author: Wenchen Fan <wenchen@databricks.com> Closes #16313 from cloud-fan/bug1.
Diffstat (limited to 'sql/hive/src/test')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala17
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala2
2 files changed, 8 insertions, 11 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index a45f4b5d63..deb40f0464 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -422,7 +422,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
val e = intercept[AnalysisException] {
df.write.mode(SaveMode.Append).saveAsTable(tableName)
}.getMessage
- assert(e.contains("Saving data in the Hive serde table `default`.`tab1` is not supported " +
+ assert(e.contains("Saving data in the Hive serde table default.tab1 is not supported " +
"yet. Please use the insertInto() API as an alternative."))
df.write.insertInto(tableName)
@@ -928,9 +928,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
createDF(10, 19).write.mode(SaveMode.Append).format("orc").saveAsTable("appendOrcToParquet")
}
assert(e.getMessage.contains(
- "The file format of the existing table default.appendOrcToParquet " +
- "is `org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat`. " +
- "It doesn't match the specified format `orc`"))
+ "The format of the existing table default.appendOrcToParquet is `ParquetFileFormat`. " +
+ "It doesn't match the specified format `OrcFileFormat`"))
}
withTable("appendParquetToJson") {
@@ -940,9 +939,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
.saveAsTable("appendParquetToJson")
}
assert(e.getMessage.contains(
- "The file format of the existing table default.appendParquetToJson " +
- "is `org.apache.spark.sql.execution.datasources.json.JsonFileFormat`. " +
- "It doesn't match the specified format `parquet`"))
+ "The format of the existing table default.appendParquetToJson is `JsonFileFormat`. " +
+ "It doesn't match the specified format `ParquetFileFormat`"))
}
withTable("appendTextToJson") {
@@ -952,9 +950,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
.saveAsTable("appendTextToJson")
}
assert(e.getMessage.contains(
- "The file format of the existing table default.appendTextToJson is " +
- "`org.apache.spark.sql.execution.datasources.json.JsonFileFormat`. " +
- "It doesn't match the specified format `text`"))
+ "The format of the existing table default.appendTextToJson is `JsonFileFormat`. " +
+ "It doesn't match the specified format `TextFileFormat`"))
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 22f13a494c..224b2c6c6f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -446,7 +446,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
.saveAsTable("t")
// Using only a subset of all partition columns
- intercept[Throwable] {
+ intercept[AnalysisException] {
partitionedTestDF2.write
.format(dataSourceName)
.mode(SaveMode.Append)