[SPARK-14954] [SQL] Add PARTITION BY and BUCKET BY clause for data source CTAS syntax

Currently, we can only create persisted partitioned and/or bucketed data source tables using the Dataset API but not using SQL DDL. This PR implements the following syntax to add partitioning and bucketing support to the SQL DDL: ``` CREATE TABLE <table-name> USING <provider> [OPTIONS (<key1> <value1>, <key2> <value2>, ...)] [PARTITIONED BY (col1, col2, ...)] [CLUSTERED BY (col1, col2, ...) [SORTED BY (col1, col2, ...)] INTO <n> BUCKETS] AS SELECT ... ``` Test cases are added in `MetastoreDataSourcesSuite` to check the newly added syntax. Author: Cheng Lian <lian@databricks.com> Author: Yin Huai <yhuai@databricks.com> Closes #12734 from liancheng/spark-14954.
author: Cheng Lian <lian@databricks.com> 2016-04-27 13:55:07 -0700
committer: Yin Huai <yhuai@databricks.com> 2016-04-27 13:55:13 -0700
commit: 24bea000476cdd0b43be5160a76bc5b170ef0b42 (patch)
tree: 5336028911f2db913d333bbfbf17b54e1b843f5c /sql/core/src
parent: f405de87c878c49b17acb2c874be1084465384e9 (diff)
download: spark-24bea000476cdd0b43be5160a76bc5b170ef0b42.tar.gz
spark-24bea000476cdd0b43be5160a76bc5b170ef0b42.tar.bz2
spark-24bea000476cdd0b43be5160a76bc5b170ef0b42.zip
1 files changed, 10 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 79fdf9fb22..e4c837a7ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -289,6 +289,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     }
     val options = Option(ctx.tablePropertyList).map(visitTablePropertyList).getOrElse(Map.empty)
     val provider = ctx.tableProvider.qualifiedName.getText
+    val bucketSpec = Option(ctx.bucketSpec()).map(visitBucketSpec)
 
     if (ctx.query != null) {
       // Get the backing query.
@@ -302,9 +303,16 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       } else {
         SaveMode.ErrorIfExists
       }
-      CreateTableUsingAsSelect(table, provider, temp, Array.empty, None, mode, options, query)
+
+      val partitionColumnNames =
+        Option(ctx.partitionColumnNames)
+          .map(visitIdentifierList(_).toArray)
+          .getOrElse(Array.empty[String])
+
+      CreateTableUsingAsSelect(
+        table, provider, temp, partitionColumnNames, bucketSpec, mode, options, query)
     } else {
-      val struct = Option(ctx.colTypeList).map(createStructType)
+      val struct = Option(ctx.colTypeList()).map(createStructType)
       CreateTableUsing(table, struct, provider, temp, options, ifNotExists, managedIfNoPath = false)
     }
   }
author	Cheng Lian <lian@databricks.com>	2016-04-27 13:55:07 -0700
committer	Yin Huai <yhuai@databricks.com>	2016-04-27 13:55:13 -0700
commit	24bea000476cdd0b43be5160a76bc5b170ef0b42 (patch)
tree	5336028911f2db913d333bbfbf17b54e1b843f5c /sql/core/src
parent	f405de87c878c49b17acb2c874be1084465384e9 (diff)
download	spark-24bea000476cdd0b43be5160a76bc5b170ef0b42.tar.gz spark-24bea000476cdd0b43be5160a76bc5b170ef0b42.tar.bz2 spark-24bea000476cdd0b43be5160a76bc5b170ef0b42.zip