aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorouyangxiaochen <ou.yangxiaochen@zte.com.cn>2017-02-13 19:41:44 -0800
committerXiao Li <gatorsmile@gmail.com>2017-02-13 19:41:44 -0800
commit6e45b547ceadbbe8394bf149945b7942df82660a (patch)
treefda868dd2b8ee22c1ef941926f0ad07deb1cfc72 /sql/hive
parente02ac303c6356cdf7fffec7361311d828a723afe (diff)
downloadspark-6e45b547ceadbbe8394bf149945b7942df82660a.tar.gz
spark-6e45b547ceadbbe8394bf149945b7942df82660a.tar.bz2
spark-6e45b547ceadbbe8394bf149945b7942df82660a.zip
[SPARK-19115][SQL] Supporting Create Table Like Location
What changes were proposed in this pull request? Support CREATE [EXTERNAL] TABLE LIKE LOCATION... syntax for Hive serde and datasource tables. In this PR,we follow SparkSQL design rules : supporting create table like view or physical table or temporary view with location. creating a table with location,this table will be an external table other than managed table. How was this patch tested? Add new test cases and update existing test cases Author: ouyangxiaochen <ou.yangxiaochen@zte.com.cn> Closes #16868 from ouyangxiaochen/spark19115.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala32
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala143
2 files changed, 146 insertions, 29 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 0bd08877a3..6d7a1c3937 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -524,24 +524,48 @@ class HiveDDLCommandSuite extends PlanTest with SQLTestUtils with TestHiveSingle
test("create table like") {
val v1 = "CREATE TABLE table1 LIKE table2"
- val (target, source, exists) = parser.parsePlan(v1).collect {
- case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting)
+ val (target, source, location, exists) = parser.parsePlan(v1).collect {
+ case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
}.head
assert(exists == false)
assert(target.database.isEmpty)
assert(target.table == "table1")
assert(source.database.isEmpty)
assert(source.table == "table2")
+ assert(location.isEmpty)
val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2"
- val (target2, source2, exists2) = parser.parsePlan(v2).collect {
- case CreateTableLikeCommand(t, s, allowExisting) => (t, s, allowExisting)
+ val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect {
+ case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
}.head
assert(exists2)
assert(target2.database.isEmpty)
assert(target2.table == "table1")
assert(source2.database.isEmpty)
assert(source2.table == "table2")
+ assert(location2.isEmpty)
+
+ val v3 = "CREATE TABLE table1 LIKE table2 LOCATION '/spark/warehouse'"
+ val (target3, source3, location3, exists3) = parser.parsePlan(v3).collect {
+ case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
+ }.head
+ assert(!exists3)
+ assert(target3.database.isEmpty)
+ assert(target3.table == "table1")
+ assert(source3.database.isEmpty)
+ assert(source3.table == "table2")
+ assert(location3 == Some("/spark/warehouse"))
+
+ val v4 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2 LOCATION '/spark/warehouse'"
+ val (target4, source4, location4, exists4) = parser.parsePlan(v4).collect {
+ case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
+ }.head
+ assert(exists4)
+ assert(target4.database.isEmpty)
+ assert(target4.table == "table1")
+ assert(source4.database.isEmpty)
+ assert(source4.table == "table2")
+ assert(location4 == Some("/spark/warehouse"))
}
test("load data") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 9d9f3a620d..c04b9ee0f2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -833,54 +833,95 @@ class HiveDDLSuite
}
test("CREATE TABLE LIKE a temporary view") {
+ // CREATE TABLE LIKE a temporary view.
+ withCreateTableLikeTempView(location = None)
+
+ // CREATE TABLE LIKE a temporary view location ...
+ withTempDir { tmpDir =>
+ withCreateTableLikeTempView(Some(tmpDir.toURI.toString))
+ }
+ }
+
+ private def withCreateTableLikeTempView(location : Option[String]): Unit = {
val sourceViewName = "tab1"
val targetTabName = "tab2"
+ val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTempView(sourceViewName) {
withTable(targetTabName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.createTempView(sourceViewName)
- sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName")
+
+ val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
+ sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause")
val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata(
TableIdentifier(sourceViewName))
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))
- checkCreateTableLike(sourceTable, targetTable)
+ checkCreateTableLike(sourceTable, targetTable, tableType)
}
}
}
test("CREATE TABLE LIKE a data source table") {
+ // CREATE TABLE LIKE a data source table.
+ withCreateTableLikeDSTable(location = None)
+
+ // CREATE TABLE LIKE a data source table location ...
+ withTempDir { tmpDir =>
+ withCreateTableLikeDSTable(Some(tmpDir.toURI.toString))
+ }
+ }
+
+ private def withCreateTableLikeDSTable(location : Option[String]): Unit = {
val sourceTabName = "tab1"
val targetTabName = "tab2"
+ val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTable(sourceTabName, targetTabName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.write.format("json").saveAsTable(sourceTabName)
- sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
+
+ val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
+ sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
val sourceTable =
- spark.sessionState.catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default")))
+ spark.sessionState.catalog.getTableMetadata(
+ TableIdentifier(sourceTabName, Some("default")))
val targetTable =
- spark.sessionState.catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))
+ spark.sessionState.catalog.getTableMetadata(
+ TableIdentifier(targetTabName, Some("default")))
// The table type of the source table should be a Hive-managed data source table
assert(DDLUtils.isDatasourceTable(sourceTable))
assert(sourceTable.tableType == CatalogTableType.MANAGED)
- checkCreateTableLike(sourceTable, targetTable)
+ checkCreateTableLike(sourceTable, targetTable, tableType)
}
}
test("CREATE TABLE LIKE an external data source table") {
+ // CREATE TABLE LIKE an external data source table.
+ withCreateTableLikeExtDSTable(location = None)
+
+ // CREATE TABLE LIKE an external data source table location ...
+ withTempDir { tmpDir =>
+ withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString))
+ }
+ }
+
+ private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = {
val sourceTabName = "tab1"
val targetTabName = "tab2"
+ val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTable(sourceTabName, targetTabName) {
withTempPath { dir =>
val path = dir.getCanonicalPath
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.write.format("parquet").save(path)
sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')")
- sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
+
+ val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
+ sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
// The source table should be an external data source table
val sourceTable = spark.sessionState.catalog.getTableMetadata(
@@ -891,30 +932,56 @@ class HiveDDLSuite
assert(DDLUtils.isDatasourceTable(sourceTable))
assert(sourceTable.tableType == CatalogTableType.EXTERNAL)
- checkCreateTableLike(sourceTable, targetTable)
+ checkCreateTableLike(sourceTable, targetTable, tableType)
}
}
}
test("CREATE TABLE LIKE a managed Hive serde table") {
- val catalog = spark.sessionState.catalog
+ // CREATE TABLE LIKE a managed Hive serde table.
+ withCreateTableLikeManagedHiveTable(location = None)
+
+ // CREATE TABLE LIKE a managed Hive serde table location ...
+ withTempDir { tmpDir =>
+ withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString))
+ }
+ }
+
+ private def withCreateTableLikeManagedHiveTable(location : Option[String]): Unit = {
val sourceTabName = "tab1"
val targetTabName = "tab2"
+ val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+ val catalog = spark.sessionState.catalog
withTable(sourceTabName, targetTabName) {
sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'")
- sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
- val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default")))
+ val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
+ sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+
+ val sourceTable = catalog.getTableMetadata(
+ TableIdentifier(sourceTabName, Some("default")))
assert(sourceTable.tableType == CatalogTableType.MANAGED)
assert(sourceTable.properties.get("prop1").nonEmpty)
- val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))
+ val targetTable = catalog.getTableMetadata(
+ TableIdentifier(targetTabName, Some("default")))
- checkCreateTableLike(sourceTable, targetTable)
+ checkCreateTableLike(sourceTable, targetTable, tableType)
}
}
test("CREATE TABLE LIKE an external Hive serde table") {
+ // CREATE TABLE LIKE an external Hive serde table.
+ withCreateTableLikeExtHiveTable(location = None)
+
+ // CREATE TABLE LIKE an external Hive serde table location ...
+ withTempDir { tmpDir =>
+ withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString))
+ }
+ }
+
+ private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit = {
val catalog = spark.sessionState.catalog
+ val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTempDir { tmpDir =>
val basePath = tmpDir.toURI
val sourceTabName = "tab1"
@@ -936,28 +1003,45 @@ class HiveDDLSuite
|SELECT 1, 'a'
""".stripMargin)
}
- sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName")
- val sourceTable = catalog.getTableMetadata(TableIdentifier(sourceTabName, Some("default")))
+ val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
+ sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+
+ val sourceTable = catalog.getTableMetadata(
+ TableIdentifier(sourceTabName, Some("default")))
assert(sourceTable.tableType == CatalogTableType.EXTERNAL)
assert(sourceTable.comment == Option("Apache Spark"))
- val targetTable = catalog.getTableMetadata(TableIdentifier(targetTabName, Some("default")))
+ val targetTable = catalog.getTableMetadata(
+ TableIdentifier(targetTabName, Some("default")))
- checkCreateTableLike(sourceTable, targetTable)
+ checkCreateTableLike(sourceTable, targetTable, tableType)
}
}
}
test("CREATE TABLE LIKE a view") {
+ // CREATE TABLE LIKE a view.
+ withCreateTableLikeView(location = None)
+
+ // CREATE TABLE LIKE a view location ...
+ withTempDir { tmpDir =>
+ withCreateTableLikeView(Some(tmpDir.toURI.toString))
+ }
+ }
+
+ private def withCreateTableLikeView(location : Option[String]): Unit = {
val sourceTabName = "tab1"
val sourceViewName = "view"
val targetTabName = "tab2"
+ val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
withTable(sourceTabName, targetTabName) {
withView(sourceViewName) {
spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
.write.format("json").saveAsTable(sourceTabName)
sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName")
- sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName")
+
+ val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
+ sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause")
val sourceView = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(sourceViewName, Some("default")))
@@ -969,15 +1053,19 @@ class HiveDDLSuite
val targetTable = spark.sessionState.catalog.getTableMetadata(
TableIdentifier(targetTabName, Some("default")))
- checkCreateTableLike(sourceView, targetTable)
+ checkCreateTableLike(sourceView, targetTable, tableType)
}
}
}
- private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = {
- // The created table should be a MANAGED table with empty view text and original text.
- assert(targetTable.tableType == CatalogTableType.MANAGED,
- "the created table must be a Hive managed table")
+ private def checkCreateTableLike(
+ sourceTable: CatalogTable,
+ targetTable: CatalogTable,
+ tableType: CatalogTableType): Unit = {
+ // The created table should be a MANAGED table or EXTERNAL table with empty view text
+ // and original text.
+ assert(targetTable.tableType == tableType,
+ s"the created table must be a/an ${tableType.name} table")
assert(targetTable.viewText.isEmpty,
"the view text in the created table must be empty")
assert(targetTable.viewDefaultDatabase.isEmpty,
@@ -1027,8 +1115,13 @@ class HiveDDLSuite
}
assert(targetTable.storage.locationUri.nonEmpty, "target table path should not be empty")
- assert(sourceTable.storage.locationUri != targetTable.storage.locationUri,
- "source table/view path should be different from target table path")
+
+ // User-specified location and sourceTable's location can be same or different,
+ // when we creating an external table. So we don't need to do this check
+ if (tableType != CatalogTableType.EXTERNAL) {
+ assert(sourceTable.storage.locationUri != targetTable.storage.locationUri,
+ "source table/view path should be different from target table path")
+ }
// The source table contents should not been seen in the target table.
assert(spark.table(sourceTable.identifier).count() != 0, "the source table should be nonempty")