aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDilip Biswal <dbiswal@us.ibm.com>2016-11-22 15:57:07 -0800
committergatorsmile <gatorsmile@gmail.com>2016-11-22 15:57:07 -0800
commit39a1d30636857715247c82d551b200e1c331ad69 (patch)
tree1ccc38bdcd0ff7c337da88ba5120b6693198ca49
parent9c42d4a76ca8046fcca2e20067f2aa461977e65a (diff)
downloadspark-39a1d30636857715247c82d551b200e1c331ad69.tar.gz
spark-39a1d30636857715247c82d551b200e1c331ad69.tar.bz2
spark-39a1d30636857715247c82d551b200e1c331ad69.zip
[SPARK-18533] Raise correct error upon specification of schema for datasource tables created using CTAS
## What changes were proposed in this pull request? Fixes the inconsistency of error raised between data source and hive serde tables when schema is specified in CTAS scenario. In the process the grammar for create table (datasource) is simplified. **before:** ``` SQL spark-sql> create table t2 (c1 int, c2 int) using parquet as select * from t1; Error in query: mismatched input 'as' expecting {<EOF>, '.', 'OPTIONS', 'CLUSTERED', 'PARTITIONED'}(line 1, pos 64) == SQL == create table t2 (c1 int, c2 int) using parquet as select * from t1 ----------------------------------------------------------------^^^ ``` **After:** ```SQL spark-sql> create table t2 (c1 int, c2 int) using parquet as select * from t1 > ; Error in query: Operation not allowed: Schema may not be specified in a Create Table As Select (CTAS) statement(line 1, pos 0) == SQL == create table t2 (c1 int, c2 int) using parquet as select * from t1 ^^^ ``` ## How was this patch tested? Added a new test in CreateTableAsSelectSuite Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #15968 from dilipbiswal/ctas.
-rw-r--r--sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g46
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala24
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala9
3 files changed, 32 insertions, 7 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index bd05855f0a..4531fe4a0e 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -71,11 +71,7 @@ statement
| createTableHeader ('(' colTypeList ')')? tableProvider
(OPTIONS tablePropertyList)?
(PARTITIONED BY partitionColumnNames=identifierList)?
- bucketSpec? #createTableUsing
- | createTableHeader tableProvider
- (OPTIONS tablePropertyList)?
- (PARTITIONED BY partitionColumnNames=identifierList)?
- bucketSpec? AS? query #createTableUsing
+ bucketSpec? (AS? query)? #createTableUsing
| createTableHeader ('(' columns=colTypeList ')')?
(COMMENT STRING)?
(PARTITIONED BY '(' partitionColumns=colTypeList ')')?
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index df509a5679..0300bfe1ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -322,7 +322,20 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
}
/**
- * Create a [[CreateTable]] logical plan.
+ * Create a data source table, returning a [[CreateTable]] logical plan.
+ *
+ * Expected format:
+ * {{{
+ * CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
+ * USING table_provider
+ * [OPTIONS table_property_list]
+ * [PARTITIONED BY (col_name, col_name, ...)]
+ * [CLUSTERED BY (col_name, col_name, ...)
+ * [SORTED BY (col_name [ASC|DESC], ...)]
+ * INTO num_buckets BUCKETS
+ * ]
+ * [AS select_statement];
+ * }}}
*/
override def visitCreateTableUsing(ctx: CreateTableUsingContext): LogicalPlan = withOrigin(ctx) {
val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
@@ -371,6 +384,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx)
}
+ // Don't allow explicit specification of schema for CTAS
+ if (schema.nonEmpty) {
+ operationNotAllowed(
+ "Schema may not be specified in a Create Table As Select (CTAS) statement",
+ ctx)
+ }
CreateTable(tableDesc, mode, Some(query))
} else {
if (temp) {
@@ -1052,7 +1071,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
"CTAS statement."
operationNotAllowed(errorMessage, ctx)
}
- // Just use whatever is projected in the select statement as our schema
+
+ // Don't allow explicit specification of schema for CTAS.
if (schema.nonEmpty) {
operationNotAllowed(
"Schema may not be specified in a Create Table As Select (CTAS) statement",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 5cc9467395..61939fe5ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -249,4 +249,13 @@ class CreateTableAsSelectSuite
}
}
}
+
+ test("specifying the column list for CTAS") {
+ withTable("t") {
+ val e = intercept[ParseException] {
+ sql("CREATE TABLE t (a int, b int) USING parquet AS SELECT 1, 2")
+ }.getMessage
+ assert(e.contains("Schema may not be specified in a Create Table As Select (CTAS)"))
+ }
+ }
}