[SPARK-7915] [SQL] Support specifying the column list for target table in CTAS

``` create table t1 (a int, b string) as select key, value from src; desc t1; key int NULL value string NULL ``` Thus Hive doesn't support specifying the column list for target table in CTAS, however, we should either throwing exception explicity, or supporting the this feature, we just pick up the later one, which seems useful and straightforward. Author: Cheng Hao <hao.cheng@intel.com> Closes #6458 from chenghao-intel/ctas_column and squashes the following commits: d1fa9b6 [Cheng Hao] bug in unittest 4e701aa [Cheng Hao] update as feedback f305ec1 [Cheng Hao] support specifying the column list for target table in CTAS
author: Cheng Hao <hao.cheng@intel.com> 2015-06-11 14:03:08 -0700
committer: Michael Armbrust <michael@databricks.com> 2015-06-11 14:03:08 -0700
commit: 040f223c5b9ca724c9f2b4abb59c21b3a23720ba (patch)
tree: a05325ba5a7ef0fbd58996dfbaa0608bdfe54b2c
parent: c8d551d546979e126c91925487e30c353185e3ba (diff)
download: spark-040f223c5b9ca724c9f2b4abb59c21b3a23720ba.tar.gz
spark-040f223c5b9ca724c9f2b4abb59c21b3a23720ba.tar.bz2
spark-040f223c5b9ca724c9f2b4abb59c21b3a23720ba.zip
2 files changed, 30 insertions, 4 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 7d3ec12c4e..87c36a8b61 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -50,17 +50,25 @@ case class CreateTableAsSelect(
       import org.apache.hadoop.io.Text
       import org.apache.hadoop.mapred.TextInputFormat
 
-      val withSchema =
+      val withFormat =
         tableDesc.copy(
-          schema =
-            query.output.map(c =>
-              HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)),
           inputFormat =
             tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)),
           outputFormat =
             tableDesc.outputFormat
               .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)),
           serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName())))
+
+      val withSchema = if (withFormat.schema.isEmpty) {
+        // Hive doesn't support specifying the column list for target table in CTAS
+        // However we don't think SparkSQL should follow that.
+        tableDesc.copy(schema =
+        query.output.map(c =>
+          HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)))
+      } else {
+        withFormat
+      }
+
       hiveContext.catalog.client.createTable(withSchema)
 
       // Get the Metastore Relation
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 40a35674e4..8bd4900497 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -360,6 +360,24 @@ class SQLQuerySuite extends QueryTest {
     }
   }
 
+  test("specifying the column list for CTAS") {
+    Seq((1, "111111"), (2, "222222")).toDF("key", "value").registerTempTable("mytable1")
+
+    sql("create table gen__tmp(a int, b string) as select key, value from mytable1")
+    checkAnswer(
+      sql("SELECT a, b from gen__tmp"),
+      sql("select key, value from mytable1").collect())
+    sql("DROP TABLE gen__tmp")
+
+    sql("create table gen__tmp(a double, b double) as select key, value from mytable1")
+    checkAnswer(
+      sql("SELECT a, b from gen__tmp"),
+      sql("select cast(key as double), cast(value as double) from mytable1").collect())
+    sql("DROP TABLE gen__tmp")
+
+    sql("drop table mytable1")
+  }
+
   test("command substitution") {
     sql("set tbl=src")
     checkAnswer(
author	Cheng Hao <hao.cheng@intel.com>	2015-06-11 14:03:08 -0700
committer	Michael Armbrust <michael@databricks.com>	2015-06-11 14:03:08 -0700
commit	040f223c5b9ca724c9f2b4abb59c21b3a23720ba (patch)
tree	a05325ba5a7ef0fbd58996dfbaa0608bdfe54b2c
parent	c8d551d546979e126c91925487e30c353185e3ba (diff)
download	spark-040f223c5b9ca724c9f2b4abb59c21b3a23720ba.tar.gz spark-040f223c5b9ca724c9f2b4abb59c21b3a23720ba.tar.bz2 spark-040f223c5b9ca724c9f2b4abb59c21b3a23720ba.zip