aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-09-18 15:37:15 +0800
committerWenchen Fan <wenchen@databricks.com>2016-09-18 15:37:15 +0800
commit3a3c9ffbd282244407e9437c2b02ae7e062dd183 (patch)
treeb591e2ca7096f92e012ed1a3717f2ac13cf8a28b
parent8faa5217b44e8d52eab7eb2d53d0652abaaf43cd (diff)
downloadspark-3a3c9ffbd282244407e9437c2b02ae7e062dd183.tar.gz
spark-3a3c9ffbd282244407e9437c2b02ae7e062dd183.tar.bz2
spark-3a3c9ffbd282244407e9437c2b02ae7e062dd183.zip
[SPARK-17518][SQL] Block Users to Specify the Internal Data Source Provider Hive
### What changes were proposed in this pull request? In Spark 2.1, we introduced a new internal provider `hive` for telling Hive serde tables from data source tables. This PR is to block users to specify this in `DataFrameWriter` and SQL APIs. ### How was this patch tested? Added a test case Author: gatorsmile <gatorsmile@gmail.com> Closes #15073 from gatorsmile/formatHive.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala5
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala7
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala51
6 files changed, 67 insertions, 3 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e137f076a0..64d3422cb4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -357,6 +357,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
}
private def saveAsTable(tableIdent: TableIdentifier): Unit = {
+ if (source.toLowerCase == "hive") {
+ throw new AnalysisException("Cannot create hive serde table with saveAsTable API")
+ }
val tableExists = df.sparkSession.sessionState.catalog.tableExists(tableIdent)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7ba1a9ff22..5359cedc80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
import org.antlr.v4.runtime.{ParserRuleContext, Token}
import org.antlr.v4.runtime.tree.TerminalNode
-import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.{AnalysisException, SaveMode}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.parser._
@@ -316,6 +316,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
}
val options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
val provider = ctx.tableProvider.qualifiedName.getText
+ if (provider.toLowerCase == "hive") {
+ throw new AnalysisException("Cannot create hive serde table with CREATE TABLE USING")
+ }
val schema = Option(ctx.colTypeList()).map(createStructType)
val partitionColumnNames =
Option(ctx.partitionColumnNames)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 1f87f0e73a..78ad710a62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -258,7 +258,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
source: String,
schema: StructType,
options: Map[String, String]): DataFrame = {
- if (source == "hive") {
+ if (source.toLowerCase == "hive") {
throw new AnalysisException("Cannot create hive serde table with createExternalTable API.")
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index b221eed7b2..549fd63f74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -322,6 +322,13 @@ class CatalogSuite
assert(e2.message == "Cannot create a file-based external data source table without path")
}
+ test("createExternalTable should fail if provider is hive") {
+ val e = intercept[AnalysisException] {
+ spark.catalog.createExternalTable("tbl", "HiVe", Map.empty[String, String])
+ }
+ assert(e.message.contains("Cannot create hive serde table with createExternalTable API"))
+ }
+
// TODO: add tests for the rest of them
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index fb11c849ed..9d2930948d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -61,7 +61,7 @@ private[hive] trait HiveStrategies {
// `ErrorIfExists` mode, and `DataFrameWriter.saveAsTable` doesn't support hive serde
// tables yet.
if (mode == SaveMode.Append || mode == SaveMode.Overwrite) {
- throw new AnalysisException("" +
+ throw new AnalysisException(
"CTAS for hive serde tables does not support append or overwrite semantics.")
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 3466733d7f..0f331bae93 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.hive.HiveExternalCatalog._
import org.apache.spark.sql.hive.client.HiveClient
@@ -1151,6 +1152,56 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
}
}
+ test("save API - format hive") {
+ withTempDir { dir =>
+ val path = dir.getCanonicalPath
+ val e = intercept[ClassNotFoundException] {
+ spark.range(10).write.format("hive").mode(SaveMode.Ignore).save(path)
+ }.getMessage
+ assert(e.contains("Failed to find data source: hive"))
+ }
+ }
+
+ test("saveAsTable API - format hive") {
+ val tableName = "tab1"
+ withTable(tableName) {
+ val e = intercept[AnalysisException] {
+ spark.range(10).write.format("hive").mode(SaveMode.Overwrite).saveAsTable(tableName)
+ }.getMessage
+ assert(e.contains("Cannot create hive serde table with saveAsTable API"))
+ }
+ }
+
+ test("create a data source table using hive") {
+ val tableName = "tab1"
+ withTable (tableName) {
+ val e = intercept[AnalysisException] {
+ sql(
+ s"""
+ |CREATE TABLE $tableName
+ |(col1 int)
+ |USING hive
+ """.stripMargin)
+ }.getMessage
+ assert(e.contains("Cannot create hive serde table with CREATE TABLE USING"))
+ }
+ }
+
+ test("create a temp view using hive") {
+ val tableName = "tab1"
+ withTable (tableName) {
+ val e = intercept[ClassNotFoundException] {
+ sql(
+ s"""
+ |CREATE TEMPORARY VIEW $tableName
+ |(col1 int)
+ |USING hive
+ """.stripMargin)
+ }.getMessage
+ assert(e.contains("Failed to find data source: hive"))
+ }
+ }
+
test("saveAsTable - source and target are the same table") {
val tableName = "tab1"
withTable(tableName) {