[SPARK-14129][SPARK-14128][SQL] Alter table DDL commands

## What changes were proposed in this pull request? In Spark 2.0, we want to handle the most common `ALTER TABLE` commands ourselves instead of passing the entire query text to Hive. This is done using the new `SessionCatalog` API introduced recently. The commands supported in this patch include: ``` ALTER TABLE ... RENAME TO ... ALTER TABLE ... SET TBLPROPERTIES ... ALTER TABLE ... UNSET TBLPROPERTIES ... ALTER TABLE ... SET LOCATION ... ALTER TABLE ... SET SERDE ... ``` The commands we explicitly do not support are: ``` ALTER TABLE ... CLUSTERED BY ... ALTER TABLE ... SKEWED BY ... ALTER TABLE ... NOT CLUSTERED ALTER TABLE ... NOT SORTED ALTER TABLE ... NOT SKEWED ALTER TABLE ... NOT STORED AS DIRECTORIES ``` For these we throw exceptions complaining that they are not supported. ## How was this patch tested? `DDLSuite` Author: Andrew Or <andrew@databricks.com> Closes #12121 from andrewor14/alter-table-ddl.
author: Andrew Or <andrew@databricks.com> 2016-04-05 14:54:07 -0700
committer: Yin Huai <yhuai@databricks.com> 2016-04-05 14:54:07 -0700
commit: 45d8cdee3945bf94d0f1bd93a12e4cb0d416468e (patch)
tree: 13e700eeb175a0cbe5419ec62f72a6acfd4c96ea /sql/catalyst
parent: c59abad052b7beec4ef550049413e95578e545be (diff)
download: spark-45d8cdee3945bf94d0f1bd93a12e4cb0d416468e.tar.gz
spark-45d8cdee3945bf94d0f1bd93a12e4cb0d416468e.tar.bz2
spark-45d8cdee3945bf94d0f1bd93a12e4cb0d416468e.zip
3 files changed, 56 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 1ebdf49348..f239b33e44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -54,6 +54,10 @@ trait FunctionRegistry {
 
   /** Checks if a function with a given name exists. */
   def functionExists(name: String): Boolean = lookupFunction(name).isDefined
+
+  /** Clear all registered functions. */
+  def clear(): Unit
+
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
@@ -93,6 +97,10 @@ class SimpleFunctionRegistry extends FunctionRegistry {
     functionBuilders.remove(name).isDefined
   }
 
+  override def clear(): Unit = {
+    functionBuilders.clear()
+  }
+
   def copy(): SimpleFunctionRegistry = synchronized {
     val registry = new SimpleFunctionRegistry
     functionBuilders.iterator.foreach { case (name, (info, builder)) =>
@@ -132,6 +140,10 @@ object EmptyFunctionRegistry extends FunctionRegistry {
     throw new UnsupportedOperationException
   }
 
+  override def clear(): Unit = {
+    throw new UnsupportedOperationException
+  }
+
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c08ffbb235..62a3b1c105 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -304,12 +304,19 @@ class SessionCatalog(
     dbTables ++ _tempTables
   }
 
+  // TODO: It's strange that we have both refresh and invalidate here.
+
   /**
    * Refresh the cache entry for a metastore table, if any.
    */
   def refreshTable(name: TableIdentifier): Unit = { /* no-op */ }
 
   /**
+   * Invalidate the cache entry for a metastore table, if any.
+   */
+  def invalidateTable(name: TableIdentifier): Unit = { /* no-op */ }
+
+  /**
    * Drop all existing temporary tables.
    * For testing only.
    */
@@ -596,6 +603,11 @@ class SessionCatalog(
   }
 
   /**
+   * List all functions in the specified database, including temporary functions.
+   */
+  def listFunctions(db: String): Seq[FunctionIdentifier] = listFunctions(db, "*")
+
+  /**
    * List all matching functions in the specified database, including temporary functions.
    */
   def listFunctions(db: String, pattern: String): Seq[FunctionIdentifier] = {
@@ -609,4 +621,34 @@ class SessionCatalog(
     // So, the returned list may have two entries for the same function.
     dbFunctions ++ loadedFunctions
   }
+
+
+  // -----------------
+  // | Other methods |
+  // -----------------
+
+  /**
+   * Drop all existing databases (except "default") along with all associated tables,
+   * partitions and functions, and set the current database to "default".
+   *
+   * This is mainly used for tests.
+   */
+  private[sql] def reset(): Unit = {
+    val default = "default"
+    listDatabases().filter(_ != default).foreach { db =>
+      dropDatabase(db, ignoreIfNotExists = false, cascade = true)
+    }
+    tempTables.clear()
+    functionRegistry.clear()
+    // restore built-in functions
+    FunctionRegistry.builtin.listFunction().foreach { f =>
+      val expressionInfo = FunctionRegistry.builtin.lookupFunction(f)
+      val functionBuilder = FunctionRegistry.builtin.lookupFunctionBuilder(f)
+      require(expressionInfo.isDefined, s"built-in function '$f' is missing expression info")
+      require(functionBuilder.isDefined, s"built-in function '$f' is missing function builder")
+      functionRegistry.registerFunction(f, expressionInfo.get, functionBuilder.get)
+    }
+    setCurrentDatabase(default)
+  }
+
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala
index 191d5e6399..d5d151a580 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringKeyHashMap.scala
@@ -41,4 +41,6 @@ class StringKeyHashMap[T](normalizer: (String) => String) {
   def remove(key: String): Option[T] = base.remove(normalizer(key))
 
   def iterator: Iterator[(String, T)] = base.toIterator
+
+  def clear(): Unit = base.clear()
 }
author	Andrew Or <andrew@databricks.com>	2016-04-05 14:54:07 -0700
committer	Yin Huai <yhuai@databricks.com>	2016-04-05 14:54:07 -0700
commit	45d8cdee3945bf94d0f1bd93a12e4cb0d416468e (patch)
tree	13e700eeb175a0cbe5419ec62f72a6acfd4c96ea /sql/catalyst
parent	c59abad052b7beec4ef550049413e95578e545be (diff)
download	spark-45d8cdee3945bf94d0f1bd93a12e4cb0d416468e.tar.gz spark-45d8cdee3945bf94d0f1bd93a12e4cb0d416468e.tar.bz2 spark-45d8cdee3945bf94d0f1bd93a12e4cb0d416468e.zip