From b1cc7da3e3c36cee59825e7b95e14b00ab1fe196 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Fri, 3 Jun 2016 14:39:41 -0700 Subject: [SPARK-15722][SQL] Disallow specifying schema in CTAS statement ## What changes were proposed in this pull request? As of this patch, the following throws an exception because the schemas may not match: ``` CREATE TABLE students (age INT, name STRING) AS SELECT * FROM boxes ``` but this is OK: ``` CREATE TABLE students AS SELECT * FROM boxes ``` ## How was this patch tested? SQLQuerySuite, HiveDDLCommandSuite Author: Andrew Or Closes #13490 from andrewor14/ctas-no-column. --- .../spark/sql/catalyst/LogicalPlanToSQLSuite.scala | 14 ++------- .../spark/sql/hive/HiveDDLCommandSuite.scala | 33 +++++----------------- .../spark/sql/hive/execution/HiveUDFSuite.scala | 5 +--- .../spark/sql/hive/execution/SQLQuerySuite.scala | 14 ++++----- .../spark/sql/hive/execution/SQLViewSuite.scala | 10 ++----- 5 files changed, 18 insertions(+), 58 deletions(-) (limited to 'sql/hive') diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala index 4315197e12..0827b04252 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala @@ -744,24 +744,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils { test("SPARK-14933 - select parquet table") { withTable("parquet_t") { - sql( - """ - |create table parquet_t (c1 int, c2 string) - |stored as parquet select 1, 'abc' - """.stripMargin) - + sql("create table parquet_t stored as parquet as select 1 as c1, 'abc' as c2") checkHiveQl("select * from parquet_t") } } test("SPARK-14933 - select orc table") { withTable("orc_t") { - sql( - """ - |create table orc_t (c1 int, c2 string) - |stored as orc select 1, 'abc' - """.stripMargin) - + sql("create table orc_t stored as orc as select 1 as c1, 'abc' as c2") checkHiveQl("select * from orc_t") } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index ba9fe54db8..867aadb5f5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -51,12 +51,6 @@ class HiveDDLCommandSuite extends PlanTest { test("Test CTAS #1") { val s1 = """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |(viewTime INT, - |userid BIGINT, - |page_url STRING, - |referrer_url STRING, - |ip STRING COMMENT 'IP Address of the User', - |country STRING COMMENT 'country of origination') |COMMENT 'This is the staging page view table' |STORED AS RCFILE |LOCATION '/user/external/page_view' @@ -69,13 +63,7 @@ class HiveDDLCommandSuite extends PlanTest { assert(desc.identifier.table == "page_view") assert(desc.tableType == CatalogTableType.EXTERNAL) assert(desc.storage.locationUri == Some("/user/external/page_view")) - assert(desc.schema == - CatalogColumn("viewtime", "int") :: - CatalogColumn("userid", "bigint") :: - CatalogColumn("page_url", "string") :: - CatalogColumn("referrer_url", "string") :: - CatalogColumn("ip", "string", comment = Some("IP Address of the User")) :: - CatalogColumn("country", "string", comment = Some("country of origination")) :: Nil) + assert(desc.schema.isEmpty) // will be populated later when the table is actually created assert(desc.comment == Some("This is the staging page view table")) // TODO will be SQLText assert(desc.viewText.isEmpty) @@ -91,12 +79,6 @@ class HiveDDLCommandSuite extends PlanTest { test("Test CTAS #2") { val s2 = """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view - |(viewTime INT, - |userid BIGINT, - |page_url STRING, - |referrer_url STRING, - |ip STRING COMMENT 'IP Address of the User', - |country STRING COMMENT 'country of origination') |COMMENT 'This is the staging page view table' |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' | STORED AS @@ -112,13 +94,7 @@ class HiveDDLCommandSuite extends PlanTest { assert(desc.identifier.table == "page_view") assert(desc.tableType == CatalogTableType.EXTERNAL) assert(desc.storage.locationUri == Some("/user/external/page_view")) - assert(desc.schema == - CatalogColumn("viewtime", "int") :: - CatalogColumn("userid", "bigint") :: - CatalogColumn("page_url", "string") :: - CatalogColumn("referrer_url", "string") :: - CatalogColumn("ip", "string", comment = Some("IP Address of the User")) :: - CatalogColumn("country", "string", comment = Some("country of origination")) :: Nil) + assert(desc.schema.isEmpty) // will be populated later when the table is actually created // TODO will be SQLText assert(desc.comment == Some("This is the staging page view table")) assert(desc.viewText.isEmpty) @@ -190,6 +166,11 @@ class HiveDDLCommandSuite extends PlanTest { " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp") } + test("CTAS statement with schema") { + assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src") + assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'") + } + test("unsupported operations") { intercept[ParseException] { parser.parsePlan( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index ffeed63695..0f56b2c0d1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -435,10 +435,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils { } // Non-External parquet pointing to /tmp/... - - sql("CREATE TABLE parquet_tmp(c1 int, c2 int) " + - " STORED AS parquet " + - " AS SELECT 1, 2") + sql("CREATE TABLE parquet_tmp STORED AS parquet AS SELECT 1, 2") val answer4 = sql("SELECT input_file_name() as file FROM parquet_tmp").head().getString(0) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 499819f32b..8244ff4ce0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -608,17 +608,15 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("specifying the column list for CTAS") { Seq((1, "111111"), (2, "222222")).toDF("key", "value").createOrReplaceTempView("mytable1") - sql("create table gen__tmp(a int, b string) as select key, value from mytable1") + sql("create table gen__tmp as select key as a, value as b from mytable1") checkAnswer( sql("SELECT a, b from gen__tmp"), sql("select key, value from mytable1").collect()) sql("DROP TABLE gen__tmp") - sql("create table gen__tmp(a double, b double) as select key, value from mytable1") - checkAnswer( - sql("SELECT a, b from gen__tmp"), - sql("select cast(key as double), cast(value as double) from mytable1").collect()) - sql("DROP TABLE gen__tmp") + intercept[AnalysisException] { + sql("create table gen__tmp(a int, b string) as select key, value from mytable1") + } sql("drop table mytable1") } @@ -1225,8 +1223,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("SPARK-10741: Sort on Aggregate using parquet") { withTable("test10741") { withTempTable("src") { - Seq("a" -> 5, "a" -> 9, "b" -> 6).toDF().createOrReplaceTempView("src") - sql("CREATE TABLE test10741(c1 STRING, c2 INT) STORED AS PARQUET AS SELECT * FROM src") + Seq("a" -> 5, "a" -> 9, "b" -> 6).toDF("c1", "c2").createOrReplaceTempView("src") + sql("CREATE TABLE test10741 STORED AS PARQUET AS SELECT * FROM src") } checkAnswer(sql( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala index 72db3618e0..39846f145c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala @@ -308,10 +308,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("SPARK-14933 - create view from hive parquet tabale") { withTable("t_part") { withView("v_part") { - spark.sql( - """create table t_part (c1 int, c2 int) - |stored as parquet as select 1 as a, 2 as b - """.stripMargin) + spark.sql("create table t_part stored as parquet as select 1 as a, 2 as b") spark.sql("create view v_part as select * from t_part") checkAnswer( sql("select * from t_part"), @@ -323,10 +320,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("SPARK-14933 - create view from hive orc tabale") { withTable("t_orc") { withView("v_orc") { - spark.sql( - """create table t_orc (c1 int, c2 int) - |stored as orc as select 1 as a, 2 as b - """.stripMargin) + spark.sql("create table t_orc stored as orc as select 1 as a, 2 as b") spark.sql("create view v_orc as select * from t_orc") checkAnswer( sql("select * from t_orc"), -- cgit v1.2.3