[SPARK-16033][SQL] insertInto() can't be used together with partitionBy()

## What changes were proposed in this pull request? When inserting into an existing partitioned table, partitioning columns should always be determined by catalog metadata of the existing table to be inserted. Extra `partitionBy()` calls don't make sense, and mess up existing data because newly inserted data may have wrong partitioning directory layout. ## How was this patch tested? New test case added in `InsertIntoHiveTableSuite`. Author: Cheng Lian <lian@databricks.com> Closes #13747 from liancheng/spark-16033-insert-into-without-partition-by.
author: Cheng Lian <lian@databricks.com> 2016-06-17 20:13:04 -0700
committer: Yin Huai <yhuai@databricks.com> 2016-06-17 20:13:04 -0700
commit: 10b671447bc04af250cbd8a7ea86f2769147a78a (patch)
tree: ff5b99ba0ad1e95937a0c55bfc0ef4934d02c3b5 /sql/hive
parent: ebb9a3b6fd834e2c856a192b4455aab83e9c4dc8 (diff)
download: spark-10b671447bc04af250cbd8a7ea86f2769147a78a.tar.gz
spark-10b671447bc04af250cbd8a7ea86f2769147a78a.tar.bz2
spark-10b671447bc04af250cbd8a7ea86f2769147a78a.zip
1 files changed, 37 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index 3bf45ced75..b890b4bffd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -346,6 +346,43 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
     }
   }
 
+  private def testPartitionedHiveSerDeTable(testName: String)(f: String => Unit): Unit = {
+    test(s"Hive SerDe table - $testName") {
+      val hiveTable = "hive_table"
+
+      withTable(hiveTable) {
+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+          sql(s"CREATE TABLE $hiveTable (a INT) PARTITIONED BY (b INT, c INT) STORED AS TEXTFILE")
+          f(hiveTable)
+        }
+      }
+    }
+  }
+
+  private def testPartitionedDataSourceTable(testName: String)(f: String => Unit): Unit = {
+    test(s"Data source table - $testName") {
+      val dsTable = "ds_table"
+
+      withTable(dsTable) {
+        sql(s"CREATE TABLE $dsTable (a INT, b INT, c INT) USING PARQUET PARTITIONED BY (b, c)")
+        f(dsTable)
+      }
+    }
+  }
+
+  private def testPartitionedTable(testName: String)(f: String => Unit): Unit = {
+    testPartitionedHiveSerDeTable(testName)(f)
+    testPartitionedDataSourceTable(testName)(f)
+  }
+
+  testPartitionedTable("partitionBy() can't be used together with insertInto()") { tableName =>
+    val cause = intercept[AnalysisException] {
+      Seq((1, 2, 3)).toDF("a", "b", "c").write.partitionBy("b", "c").insertInto(tableName)
+    }
+
+    assert(cause.getMessage.contains("insertInto() can't be used together with partitionBy()."))
+  }
+
   test("InsertIntoTable#resolved should include dynamic partitions") {
     withSQLConf(("hive.exec.dynamic.partition.mode", "nonstrict")) {
       sql("CREATE TABLE partitioned (id bigint, data string) PARTITIONED BY (part string)")
author	Cheng Lian <lian@databricks.com>	2016-06-17 20:13:04 -0700
committer	Yin Huai <yhuai@databricks.com>	2016-06-17 20:13:04 -0700
commit	10b671447bc04af250cbd8a7ea86f2769147a78a (patch)
tree	ff5b99ba0ad1e95937a0c55bfc0ef4934d02c3b5 /sql/hive
parent	ebb9a3b6fd834e2c856a192b4455aab83e9c4dc8 (diff)
download	spark-10b671447bc04af250cbd8a7ea86f2769147a78a.tar.gz spark-10b671447bc04af250cbd8a7ea86f2769147a78a.tar.bz2 spark-10b671447bc04af250cbd8a7ea86f2769147a78a.zip