aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorAndrew Or <andrew@databricks.com>2016-06-03 14:39:41 -0700
committerWenchen Fan <wenchen@databricks.com>2016-06-03 14:39:41 -0700
commitb1cc7da3e3c36cee59825e7b95e14b00ab1fe196 (patch)
tree994249ba2c4681303b734ef6012fad40c4ea9439 /sql/hive
parent11c83f83d5172167cb64513d5311b4178797d40e (diff)
downloadspark-b1cc7da3e3c36cee59825e7b95e14b00ab1fe196.tar.gz
spark-b1cc7da3e3c36cee59825e7b95e14b00ab1fe196.tar.bz2
spark-b1cc7da3e3c36cee59825e7b95e14b00ab1fe196.zip
[SPARK-15722][SQL] Disallow specifying schema in CTAS statement
## What changes were proposed in this pull request? As of this patch, the following throws an exception because the schemas may not match: ``` CREATE TABLE students (age INT, name STRING) AS SELECT * FROM boxes ``` but this is OK: ``` CREATE TABLE students AS SELECT * FROM boxes ``` ## How was this patch tested? SQLQuerySuite, HiveDDLCommandSuite Author: Andrew Or <andrew@databricks.com> Closes #13490 from andrewor14/ctas-no-column.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala14
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala33
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala14
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala10
5 files changed, 18 insertions, 58 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
index 4315197e12..0827b04252 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
@@ -744,24 +744,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
test("SPARK-14933 - select parquet table") {
withTable("parquet_t") {
- sql(
- """
- |create table parquet_t (c1 int, c2 string)
- |stored as parquet select 1, 'abc'
- """.stripMargin)
-
+ sql("create table parquet_t stored as parquet as select 1 as c1, 'abc' as c2")
checkHiveQl("select * from parquet_t")
}
}
test("SPARK-14933 - select orc table") {
withTable("orc_t") {
- sql(
- """
- |create table orc_t (c1 int, c2 string)
- |stored as orc select 1, 'abc'
- """.stripMargin)
-
+ sql("create table orc_t stored as orc as select 1 as c1, 'abc' as c2")
checkHiveQl("select * from orc_t")
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index ba9fe54db8..867aadb5f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -51,12 +51,6 @@ class HiveDDLCommandSuite extends PlanTest {
test("Test CTAS #1") {
val s1 =
"""CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
- |(viewTime INT,
- |userid BIGINT,
- |page_url STRING,
- |referrer_url STRING,
- |ip STRING COMMENT 'IP Address of the User',
- |country STRING COMMENT 'country of origination')
|COMMENT 'This is the staging page view table'
|STORED AS RCFILE
|LOCATION '/user/external/page_view'
@@ -69,13 +63,7 @@ class HiveDDLCommandSuite extends PlanTest {
assert(desc.identifier.table == "page_view")
assert(desc.tableType == CatalogTableType.EXTERNAL)
assert(desc.storage.locationUri == Some("/user/external/page_view"))
- assert(desc.schema ==
- CatalogColumn("viewtime", "int") ::
- CatalogColumn("userid", "bigint") ::
- CatalogColumn("page_url", "string") ::
- CatalogColumn("referrer_url", "string") ::
- CatalogColumn("ip", "string", comment = Some("IP Address of the User")) ::
- CatalogColumn("country", "string", comment = Some("country of origination")) :: Nil)
+ assert(desc.schema.isEmpty) // will be populated later when the table is actually created
assert(desc.comment == Some("This is the staging page view table"))
// TODO will be SQLText
assert(desc.viewText.isEmpty)
@@ -91,12 +79,6 @@ class HiveDDLCommandSuite extends PlanTest {
test("Test CTAS #2") {
val s2 =
"""CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
- |(viewTime INT,
- |userid BIGINT,
- |page_url STRING,
- |referrer_url STRING,
- |ip STRING COMMENT 'IP Address of the User',
- |country STRING COMMENT 'country of origination')
|COMMENT 'This is the staging page view table'
|ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
| STORED AS
@@ -112,13 +94,7 @@ class HiveDDLCommandSuite extends PlanTest {
assert(desc.identifier.table == "page_view")
assert(desc.tableType == CatalogTableType.EXTERNAL)
assert(desc.storage.locationUri == Some("/user/external/page_view"))
- assert(desc.schema ==
- CatalogColumn("viewtime", "int") ::
- CatalogColumn("userid", "bigint") ::
- CatalogColumn("page_url", "string") ::
- CatalogColumn("referrer_url", "string") ::
- CatalogColumn("ip", "string", comment = Some("IP Address of the User")) ::
- CatalogColumn("country", "string", comment = Some("country of origination")) :: Nil)
+ assert(desc.schema.isEmpty) // will be populated later when the table is actually created
// TODO will be SQLText
assert(desc.comment == Some("This is the staging page view table"))
assert(desc.viewText.isEmpty)
@@ -190,6 +166,11 @@ class HiveDDLCommandSuite extends PlanTest {
" AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp")
}
+ test("CTAS statement with schema") {
+ assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src")
+ assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'")
+ }
+
test("unsupported operations") {
intercept[ParseException] {
parser.parsePlan(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index ffeed63695..0f56b2c0d1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -435,10 +435,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
}
// Non-External parquet pointing to /tmp/...
-
- sql("CREATE TABLE parquet_tmp(c1 int, c2 int) " +
- " STORED AS parquet " +
- " AS SELECT 1, 2")
+ sql("CREATE TABLE parquet_tmp STORED AS parquet AS SELECT 1, 2")
val answer4 =
sql("SELECT input_file_name() as file FROM parquet_tmp").head().getString(0)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 499819f32b..8244ff4ce0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -608,17 +608,15 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
test("specifying the column list for CTAS") {
Seq((1, "111111"), (2, "222222")).toDF("key", "value").createOrReplaceTempView("mytable1")
- sql("create table gen__tmp(a int, b string) as select key, value from mytable1")
+ sql("create table gen__tmp as select key as a, value as b from mytable1")
checkAnswer(
sql("SELECT a, b from gen__tmp"),
sql("select key, value from mytable1").collect())
sql("DROP TABLE gen__tmp")
- sql("create table gen__tmp(a double, b double) as select key, value from mytable1")
- checkAnswer(
- sql("SELECT a, b from gen__tmp"),
- sql("select cast(key as double), cast(value as double) from mytable1").collect())
- sql("DROP TABLE gen__tmp")
+ intercept[AnalysisException] {
+ sql("create table gen__tmp(a int, b string) as select key, value from mytable1")
+ }
sql("drop table mytable1")
}
@@ -1225,8 +1223,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
test("SPARK-10741: Sort on Aggregate using parquet") {
withTable("test10741") {
withTempTable("src") {
- Seq("a" -> 5, "a" -> 9, "b" -> 6).toDF().createOrReplaceTempView("src")
- sql("CREATE TABLE test10741(c1 STRING, c2 INT) STORED AS PARQUET AS SELECT * FROM src")
+ Seq("a" -> 5, "a" -> 9, "b" -> 6).toDF("c1", "c2").createOrReplaceTempView("src")
+ sql("CREATE TABLE test10741 STORED AS PARQUET AS SELECT * FROM src")
}
checkAnswer(sql(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala
index 72db3618e0..39846f145c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala
@@ -308,10 +308,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
test("SPARK-14933 - create view from hive parquet tabale") {
withTable("t_part") {
withView("v_part") {
- spark.sql(
- """create table t_part (c1 int, c2 int)
- |stored as parquet as select 1 as a, 2 as b
- """.stripMargin)
+ spark.sql("create table t_part stored as parquet as select 1 as a, 2 as b")
spark.sql("create view v_part as select * from t_part")
checkAnswer(
sql("select * from t_part"),
@@ -323,10 +320,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
test("SPARK-14933 - create view from hive orc tabale") {
withTable("t_orc") {
withView("v_orc") {
- spark.sql(
- """create table t_orc (c1 int, c2 int)
- |stored as orc as select 1 as a, 2 as b
- """.stripMargin)
+ spark.sql("create table t_orc stored as orc as select 1 as a, 2 as b")
spark.sql("create view v_orc as select * from t_orc")
checkAnswer(
sql("select * from t_orc"),