diff options
author | Andrew Or <andrew@databricks.com> | 2016-04-13 11:08:34 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-04-13 11:08:34 -0700 |
commit | 7d2ed8cc030f3d84fea47fded072c320c3d87ca7 (patch) | |
tree | c85aedad5a7fb97ca4b02f1c8f81983f89b90f97 /sql/core/src | |
parent | 1018a1c1eb33eefbfb9025fac7a1cdafc5cbf8f8 (diff) | |
download | spark-7d2ed8cc030f3d84fea47fded072c320c3d87ca7.tar.gz spark-7d2ed8cc030f3d84fea47fded072c320c3d87ca7.tar.bz2 spark-7d2ed8cc030f3d84fea47fded072c320c3d87ca7.zip |
[SPARK-14388][SQL] Implement CREATE TABLE
## What changes were proposed in this pull request?
This patch implements the `CREATE TABLE` command using the `SessionCatalog`. Previously we handled only `CTAS` and `CREATE TABLE ... USING`. This requires us to refactor `CatalogTable` to accept various fields (e.g. bucket and skew columns) and pass them to Hive.
WIP: Note that I haven't verified whether this actually works yet! But I believe it does.
## How was this patch tested?
Tests will come in a future commit.
Author: Andrew Or <andrew@databricks.com>
Author: Yin Huai <yhuai@databricks.com>
Closes #12271 from andrewor14/create-table-ddl.
Diffstat (limited to 'sql/core/src')
5 files changed, 88 insertions, 62 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 73d9640c35..af92cecee5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -179,7 +179,9 @@ class SparkSqlAstBuilder extends AstBuilder { } } - /** Type to keep track of a table header. */ + /** + * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal). + */ type TableHeader = (TableIdentifier, Boolean, Boolean, Boolean) /** @@ -616,10 +618,7 @@ class SparkSqlAstBuilder extends AstBuilder { case s: GenericFileFormatContext => (Seq.empty[String], Option(s.identifier.getText)) case s: TableFileFormatContext => - val elements = Seq(s.inFmt, s.outFmt) ++ - Option(s.serdeCls).toSeq ++ - Option(s.inDriver).toSeq ++ - Option(s.outDriver).toSeq + val elements = Seq(s.inFmt, s.outFmt) ++ Option(s.serdeCls).toSeq (elements.map(string), None) } AlterTableSetFileFormat( @@ -774,22 +773,6 @@ class SparkSqlAstBuilder extends AstBuilder { } /** - * Create a skew specification. This contains three components: - * - The Skewed Columns - * - Values for which are skewed. The size of each entry must match the number of skewed columns. - * - A store in directory flag. - */ - override def visitSkewSpec( - ctx: SkewSpecContext): (Seq[String], Seq[Seq[String]], Boolean) = withOrigin(ctx) { - val skewedValues = if (ctx.constantList != null) { - Seq(visitConstantList(ctx.constantList)) - } else { - visitNestedConstantList(ctx.nestedConstantList) - } - (visitIdentifierList(ctx.identifierList), skewedValues, ctx.DIRECTORIES != null) - } - - /** * Convert a nested constants list into a sequence of string sequences. */ override def visitNestedConstantList( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 5137bd11d8..234099ad15 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -225,29 +225,6 @@ case class DropTable( } /** - * A command that renames a table/view. - * - * The syntax of this command is: - * {{{ - * ALTER TABLE table1 RENAME TO table2; - * ALTER VIEW view1 RENAME TO view2; - * }}} - */ -case class AlterTableRename( - oldName: TableIdentifier, - newName: TableIdentifier) - extends RunnableCommand { - - override def run(sqlContext: SQLContext): Seq[Row] = { - val catalog = sqlContext.sessionState.catalog - catalog.invalidateTable(oldName) - catalog.renameTable(oldName, newName) - Seq.empty[Row] - } - -} - -/** * A command that sets table/view properties. * * The syntax of this command is: diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala new file mode 100644 index 0000000000..9c6030502d --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogTable + + +// TODO: move the rest of the table commands from ddl.scala to this file + +/** + * A command to create a table. + * + * Note: This is currently used only for creating Hive tables. + * This is not intended for temporary tables. + * + * The syntax of using this command in SQL is: + * {{{ + * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name + * [(col1 data_type [COMMENT col_comment], ...)] + * [COMMENT table_comment] + * [PARTITIONED BY (col3 data_type [COMMENT col_comment], ...)] + * [CLUSTERED BY (col1, ...) [SORTED BY (col1 [ASC|DESC], ...)] INTO num_buckets BUCKETS] + * [SKEWED BY (col1, col2, ...) ON ((col_value, col_value, ...), ...) + * [STORED AS DIRECTORIES] + * [ROW FORMAT row_format] + * [STORED AS file_format | STORED BY storage_handler_class [WITH SERDEPROPERTIES (...)]] + * [LOCATION path] + * [TBLPROPERTIES (property_name=property_value, ...)] + * [AS select_statement]; + * }}} + */ +case class CreateTable(table: CatalogTable, ifNotExists: Boolean) extends RunnableCommand { + + override def run(sqlContext: SQLContext): Seq[Row] = { + sqlContext.sessionState.catalog.createTable(table, ifNotExists) + Seq.empty[Row] + } + +} + + +/** + * A command that renames a table/view. + * + * The syntax of this command is: + * {{{ + * ALTER TABLE table1 RENAME TO table2; + * ALTER VIEW view1 RENAME TO view2; + * }}} + */ +case class AlterTableRename( + oldName: TableIdentifier, + newName: TableIdentifier) + extends RunnableCommand { + + override def run(sqlContext: SQLContext): Seq[Row] = { + val catalog = sqlContext.sessionState.catalog + catalog.invalidateTable(oldName) + catalog.renameTable(oldName, newName) + Seq.empty[Row] + } + +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 1c8dd68286..6e6475ee29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -440,37 +440,25 @@ class DDLCommandSuite extends PlanTest { } test("alter table: set file format") { - val sql1 = - """ - |ALTER TABLE table_name SET FILEFORMAT INPUTFORMAT 'test' - |OUTPUTFORMAT 'test' SERDE 'test' INPUTDRIVER 'test' OUTPUTDRIVER 'test' - """.stripMargin - val sql2 = "ALTER TABLE table_name SET FILEFORMAT INPUTFORMAT 'test' " + + val sql1 = "ALTER TABLE table_name SET FILEFORMAT INPUTFORMAT 'test' " + "OUTPUTFORMAT 'test' SERDE 'test'" - val sql3 = "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') " + + val sql2 = "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') " + "SET FILEFORMAT PARQUET" val parsed1 = parser.parsePlan(sql1) val parsed2 = parser.parsePlan(sql2) - val parsed3 = parser.parsePlan(sql3) val tableIdent = TableIdentifier("table_name", None) val expected1 = AlterTableSetFileFormat( tableIdent, None, - List("test", "test", "test", "test", "test"), + List("test", "test", "test"), None)(sql1) val expected2 = AlterTableSetFileFormat( tableIdent, - None, - List("test", "test", "test"), - None)(sql2) - val expected3 = AlterTableSetFileFormat( - tableIdent, Some(Map("dt" -> "2008-08-08", "country" -> "us")), Seq(), - Some("PARQUET"))(sql3) + Some("PARQUET"))(sql2) comparePlans(parsed1, expected1) comparePlans(parsed2, expected2) - comparePlans(parsed3, expected3) } test("alter table: set location") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 40a8b0e614..9ffffa0bdd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -380,8 +380,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } - // TODO: ADD a testcase for Drop Database in Restric when we can create tables in SQLContext - test("show tables") { withTempTable("show1a", "show2b") { sql( |