aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-11-22 15:10:49 -0800
committergatorsmile <gatorsmile@gmail.com>2016-11-22 15:10:49 -0800
commit9c42d4a76ca8046fcca2e20067f2aa461977e65a (patch)
tree09e95ee4fa540b5da785d31910ccf6f5f1a81000
parent2fd101b2f0028e005fbb0bdd29e59af37aa637da (diff)
downloadspark-9c42d4a76ca8046fcca2e20067f2aa461977e65a.tar.gz
spark-9c42d4a76ca8046fcca2e20067f2aa461977e65a.tar.bz2
spark-9c42d4a76ca8046fcca2e20067f2aa461977e65a.zip
[SPARK-16803][SQL] SaveAsTable does not work when target table is a Hive serde table
### What changes were proposed in this pull request? In Spark 2.0, `SaveAsTable` does not work when the target table is a Hive serde table, but Spark 1.6 works. **Spark 1.6** ``` Scala scala> sql("create table sample.sample stored as SEQUENCEFILE as select 1 as key, 'abc' as value") res2: org.apache.spark.sql.DataFrame = [] scala> val df = sql("select key, value as value from sample.sample") df: org.apache.spark.sql.DataFrame = [key: int, value: string] scala> df.write.mode("append").saveAsTable("sample.sample") scala> sql("select * from sample.sample").show() +---+-----+ |key|value| +---+-----+ | 1| abc| | 1| abc| +---+-----+ ``` **Spark 2.0** ``` Scala scala> df.write.mode("append").saveAsTable("sample.sample") org.apache.spark.sql.AnalysisException: Saving data in MetastoreRelation sample, sample is not supported.; ``` So far, we do not plan to support it in Spark 2.1 due to the risk. Spark 1.6 works because it internally uses insertInto. But, if we change it back it will break the semantic of saveAsTable (this method uses by-name resolution instead of using by-position resolution used by insertInto). More extra changes are needed to support `hive` as a `format` in DataFrameWriter. Instead, users should use insertInto API. This PR corrects the error messages. Users can understand how to bypass it before we support it in a separate PR. ### How was this patch tested? Test cases are added Author: gatorsmile <gatorsmile@gmail.com> Closes #15926 from gatorsmile/saveAsTableFix5.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala20
2 files changed, 24 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 7e16e43f2b..add732c1af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -175,6 +175,10 @@ case class CreateDataSourceTableAsSelectCommand(
existingSchema = Some(l.schema)
case s: SimpleCatalogRelation if DDLUtils.isDatasourceTable(s.metadata) =>
existingSchema = Some(s.metadata.schema)
+ case c: CatalogRelation if c.catalogTable.provider == Some(DDLUtils.HIVE_PROVIDER) =>
+ throw new AnalysisException("Saving data in the Hive serde table " +
+ s"${c.catalogTable.identifier} is not supported yet. Please use the " +
+ "insertInto() API as an alternative..")
case o =>
throw new AnalysisException(s"Saving data in ${o.toString} is not supported.")
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 4ab1a54edc..c7cc75fbc8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -413,6 +413,26 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
}
}
+ test("saveAsTable(CTAS) using append and insertInto when the target table is Hive serde") {
+ val tableName = "tab1"
+ withTable(tableName) {
+ sql(s"CREATE TABLE $tableName STORED AS SEQUENCEFILE AS SELECT 1 AS key, 'abc' AS value")
+
+ val df = sql(s"SELECT key, value FROM $tableName")
+ val e = intercept[AnalysisException] {
+ df.write.mode(SaveMode.Append).saveAsTable(tableName)
+ }.getMessage
+ assert(e.contains("Saving data in the Hive serde table `default`.`tab1` is not supported " +
+ "yet. Please use the insertInto() API as an alternative."))
+
+ df.write.insertInto(tableName)
+ checkAnswer(
+ sql(s"SELECT * FROM $tableName"),
+ Row(1, "abc") :: Row(1, "abc") :: Nil
+ )
+ }
+ }
+
test("SPARK-5839 HiveMetastoreCatalog does not recognize table aliases of data source tables.") {
withTable("savedJsonTable") {
// Save the df as a managed table (by not specifying the path).