aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-04-09 17:40:36 -0700
committerYin Huai <yhuai@databricks.com>2016-04-09 17:40:36 -0700
commitdfce9665c4b2b29a19e6302216dae2800da68ff9 (patch)
treea1a1c2e14c14a28249eb4a2b29ca47ea1a07947f /sql/hive/src
parent9be5558e009069925d1f2d737d42e1683ed6b47f (diff)
downloadspark-dfce9665c4b2b29a19e6302216dae2800da68ff9.tar.gz
spark-dfce9665c4b2b29a19e6302216dae2800da68ff9.tar.bz2
spark-dfce9665c4b2b29a19e6302216dae2800da68ff9.zip
[SPARK-14362][SPARK-14406][SQL] DDL Native Support: Drop View and Drop Table
#### What changes were proposed in this pull request? This PR is to provide a native support for DDL `DROP VIEW` and `DROP TABLE`. The PR includes native parsing and native analysis. Based on the HIVE DDL document for [DROP_VIEW_WEB_LINK](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL- DropView ), `DROP VIEW` is defined as, **Syntax:** ```SQL DROP VIEW [IF EXISTS] [db_name.]view_name; ``` - to remove metadata for the specified view. - illegal to use DROP TABLE on a view. - illegal to use DROP VIEW on a table. - this command only works in `HiveContext`. In `SQLContext`, we will get an exception. This PR also handles `DROP TABLE`. **Syntax:** ```SQL DROP TABLE [IF EXISTS] table_name [PURGE]; ``` - Previously, the `DROP TABLE` command only can drop Hive tables in `HiveContext`. Now, after this PR, this command also can drop temporary table, external table, external data source table in `SQLContext`. - In `HiveContext`, we will not issue an exception if the to-be-dropped table does not exist and users did not specify `IF EXISTS`. Instead, we just log an error message. If `IF EXISTS` is specified, we will not issue any error message/exception. - In `SQLContext`, we will issue an exception if the to-be-dropped table does not exist, unless `IF EXISTS` is specified. - Data will not be deleted if the tables are `external`, unless table type is `managed_table`. #### How was this patch tested? For verifying command parsing, added test cases in `spark/sql/hive/HiveDDLCommandSuite.scala` For verifying command analysis, added test cases in `spark/sql/hive/execution/HiveDDLSuite.scala` Author: gatorsmile <gatorsmile@gmail.com> Author: xiaoli <lixiao1983@gmail.com> Author: Xiao Li <xiaoli@Xiaos-MacBook-Pro.local> Closes #12146 from gatorsmile/dropView.
Diffstat (limited to 'sql/hive/src')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala13
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala30
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala10
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala156
6 files changed, 169 insertions, 46 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index b1156fb3e2..a49ce33ba1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -182,6 +182,10 @@ private[spark] class HiveExternalCatalog(client: HiveClient) extends ExternalCat
client.getTable(db, table)
}
+ override def getTableOption(db: String, table: String): Option[CatalogTable] = withClient {
+ client.getTableOption(db, table)
+ }
+
override def tableExists(db: String, table: String): Boolean = withClient {
client.getTableOption(db, table).isDefined
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 0cccc22e5a..875652c226 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -70,6 +70,8 @@ private[sql] class HiveSessionCatalog(
}
}
+ override def isViewSupported: Boolean = true
+
// ----------------------------------------------------------------
// | Methods and fields for interacting with HiveMetastoreCatalog |
// ----------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index 657edb493a..7a435117e7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -104,19 +104,6 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder {
}
/**
- * Create a [[DropTable]] command.
- */
- override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) {
- if (ctx.PURGE != null) {
- logWarning("PURGE option is ignored.")
- }
- if (ctx.REPLICATION != null) {
- logWarning("REPLICATION clause is ignored.")
- }
- DropTable(visitTableIdentifier(ctx.tableIdentifier).toString, ctx.EXISTS != null)
- }
-
- /**
* Create an [[AnalyzeTable]] command. This currently only implements the NOSCAN option (other
* options are passed on to Hive) e.g.:
* {{{
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index 64d1341a47..06badff474 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -46,36 +46,6 @@ case class AnalyzeTable(tableName: String) extends RunnableCommand {
}
}
-/**
- * Drops a table from the metastore and removes it if it is cached.
- */
-private[hive]
-case class DropTable(
- tableName: String,
- ifExists: Boolean) extends RunnableCommand {
-
- override def run(sqlContext: SQLContext): Seq[Row] = {
- val hiveContext = sqlContext.asInstanceOf[HiveContext]
- val ifExistsClause = if (ifExists) "IF EXISTS " else ""
- try {
- hiveContext.cacheManager.tryUncacheQuery(hiveContext.table(tableName))
- } catch {
- // This table's metadata is not in Hive metastore (e.g. the table does not exist).
- case _: org.apache.hadoop.hive.ql.metadata.InvalidTableException =>
- case _: org.apache.spark.sql.catalyst.analysis.NoSuchTableException =>
- // Other Throwables can be caused by users providing wrong parameters in OPTIONS
- // (e.g. invalid paths). We catch it and log a warning message.
- // Users should be able to drop such kinds of tables regardless if there is an error.
- case e: Throwable => log.warn(s"${e.getMessage}", e)
- }
- hiveContext.invalidateTable(tableName)
- hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")
- hiveContext.sessionState.catalog.dropTable(
- TableIdentifier(tableName), ignoreIfNotExists = true)
- Seq.empty[Row]
- }
-}
-
private[hive]
case class AddJar(path: String) extends RunnableCommand {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 12a582c10a..a144da4997 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -72,6 +72,7 @@ class HiveDDLCommandSuite extends PlanTest {
CatalogColumn("country", "string", comment = Some("country of origination")) :: Nil)
// TODO will be SQLText
assert(desc.viewText == Option("This is the staging page view table"))
+ assert(desc.viewOriginalText.isEmpty)
assert(desc.partitionColumns ==
CatalogColumn("dt", "string", comment = Some("date type")) ::
CatalogColumn("hour", "string", comment = Some("hour of the day")) :: Nil)
@@ -118,6 +119,7 @@ class HiveDDLCommandSuite extends PlanTest {
CatalogColumn("country", "string", comment = Some("country of origination")) :: Nil)
// TODO will be SQLText
assert(desc.viewText == Option("This is the staging page view table"))
+ assert(desc.viewOriginalText.isEmpty)
assert(desc.partitionColumns ==
CatalogColumn("dt", "string", comment = Some("date type")) ::
CatalogColumn("hour", "string", comment = Some("hour of the day")) :: Nil)
@@ -138,6 +140,7 @@ class HiveDDLCommandSuite extends PlanTest {
assert(desc.storage.locationUri == None)
assert(desc.schema == Seq.empty[CatalogColumn])
assert(desc.viewText == None) // TODO will be SQLText
+ assert(desc.viewOriginalText.isEmpty)
assert(desc.storage.serdeProperties == Map())
assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
assert(desc.storage.outputFormat ==
@@ -173,6 +176,7 @@ class HiveDDLCommandSuite extends PlanTest {
assert(desc.storage.locationUri == None)
assert(desc.schema == Seq.empty[CatalogColumn])
assert(desc.viewText == None) // TODO will be SQLText
+ assert(desc.viewOriginalText.isEmpty)
assert(desc.storage.serdeProperties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
@@ -286,7 +290,7 @@ class HiveDDLCommandSuite extends PlanTest {
}
test("use backticks in output of Script Transform") {
- val plan = parser.parsePlan(
+ parser.parsePlan(
"""SELECT `t`.`thing1`
|FROM (SELECT TRANSFORM (`parquet_t1`.`key`, `parquet_t1`.`value`)
|USING 'cat' AS (`thing1` int, `thing2` string) FROM `default`.`parquet_t1`) AS t
@@ -294,7 +298,7 @@ class HiveDDLCommandSuite extends PlanTest {
}
test("use backticks in output of Generator") {
- val plan = parser.parsePlan(
+ parser.parsePlan(
"""
|SELECT `gentab2`.`gencol2`
|FROM `default`.`src`
@@ -304,7 +308,7 @@ class HiveDDLCommandSuite extends PlanTest {
}
test("use escaped backticks in output of Generator") {
- val plan = parser.parsePlan(
+ parser.parsePlan(
"""
|SELECT `gen``tab2`.`gen``col2`
|FROM `default`.`src`
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
new file mode 100644
index 0000000000..78ccdc7adb
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, SaveMode}
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+
+class HiveDDLSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+ import hiveContext.implicits._
+
+ // check if the directory for recording the data of the table exists.
+ private def tableDirectoryExists(tableIdentifier: TableIdentifier): Boolean = {
+ val expectedTablePath =
+ hiveContext.sessionState.catalog.hiveDefaultTableFilePath(tableIdentifier)
+ val filesystemPath = new Path(expectedTablePath)
+ val fs = filesystemPath.getFileSystem(sparkContext.hadoopConfiguration)
+ fs.exists(filesystemPath)
+ }
+
+ test("drop tables") {
+ withTable("tab1") {
+ val tabName = "tab1"
+
+ assert(!tableDirectoryExists(TableIdentifier(tabName)))
+ sql(s"CREATE TABLE $tabName(c1 int)")
+
+ assert(tableDirectoryExists(TableIdentifier(tabName)))
+ sql(s"DROP TABLE $tabName")
+
+ assert(!tableDirectoryExists(TableIdentifier(tabName)))
+ sql(s"DROP TABLE IF EXISTS $tabName")
+ sql(s"DROP VIEW IF EXISTS $tabName")
+ }
+ }
+
+ test("drop managed tables") {
+ withTempDir { tmpDir =>
+ val tabName = "tab1"
+ withTable(tabName) {
+ assert(tmpDir.listFiles.isEmpty)
+ sql(
+ s"""
+ |create table $tabName
+ |stored as parquet
+ |location '$tmpDir'
+ |as select 1, '3'
+ """.stripMargin)
+
+ val hiveTable =
+ hiveContext.sessionState.catalog
+ .getTableMetadata(TableIdentifier(tabName, Some("default")))
+ // It is a managed table, although it uses external in SQL
+ assert(hiveTable.tableType == CatalogTableType.MANAGED_TABLE)
+
+ assert(tmpDir.listFiles.nonEmpty)
+ sql(s"DROP TABLE $tabName")
+ // The data are deleted since the table type is not EXTERNAL
+ assert(tmpDir.listFiles == null)
+ }
+ }
+ }
+
+ test("drop external data source table") {
+ withTempDir { tmpDir =>
+ val tabName = "tab1"
+ withTable(tabName) {
+ assert(tmpDir.listFiles.isEmpty)
+
+ withSQLConf(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> "true") {
+ Seq(1 -> "a").toDF("i", "j")
+ .write
+ .mode(SaveMode.Overwrite)
+ .format("parquet")
+ .option("path", tmpDir.toString)
+ .saveAsTable(tabName)
+ }
+
+ val hiveTable =
+ hiveContext.sessionState.catalog
+ .getTableMetadata(TableIdentifier(tabName, Some("default")))
+ // This data source table is external table
+ assert(hiveTable.tableType == CatalogTableType.EXTERNAL_TABLE)
+
+ assert(tmpDir.listFiles.nonEmpty)
+ sql(s"DROP TABLE $tabName")
+ // The data are not deleted since the table type is EXTERNAL
+ assert(tmpDir.listFiles.nonEmpty)
+ }
+ }
+ }
+
+ test("drop views") {
+ withTable("tab1") {
+ val tabName = "tab1"
+ sqlContext.range(10).write.saveAsTable("tab1")
+ withView("view1") {
+ val viewName = "view1"
+
+ assert(tableDirectoryExists(TableIdentifier(tabName)))
+ assert(!tableDirectoryExists(TableIdentifier(viewName)))
+ sql(s"CREATE VIEW $viewName AS SELECT * FROM tab1")
+
+ assert(tableDirectoryExists(TableIdentifier(tabName)))
+ assert(!tableDirectoryExists(TableIdentifier(viewName)))
+ sql(s"DROP VIEW $viewName")
+
+ assert(tableDirectoryExists(TableIdentifier(tabName)))
+ sql(s"DROP VIEW IF EXISTS $viewName")
+ }
+ }
+ }
+
+ test("drop table using drop view") {
+ withTable("tab1") {
+ sql("CREATE TABLE tab1(c1 int)")
+ val message = intercept[AnalysisException] {
+ sql("DROP VIEW tab1")
+ }.getMessage
+ assert(message.contains("Cannot drop a table with DROP VIEW. Please use DROP TABLE instead"))
+ }
+ }
+
+ test("drop view using drop table") {
+ withTable("tab1") {
+ sqlContext.range(10).write.saveAsTable("tab1")
+ withView("view1") {
+ sql("CREATE VIEW view1 AS SELECT * FROM tab1")
+ val message = intercept[AnalysisException] {
+ sql("DROP TABLE view1")
+ }.getMessage
+ assert(message.contains("Cannot drop a view with DROP TABLE. Please use DROP VIEW instead"))
+ }
+ }
+ }
+}