[SPARK-15871][SQL] Add `assertNotPartitioned` check in `DataFrameWriter`

## What changes were proposed in this pull request? It doesn't make sense to specify partitioning parameters, when we write data out from Datasets/DataFrames into `jdbc` tables or streaming `ForeachWriter`s. This patch adds `assertNotPartitioned` check in `DataFrameWriter`. <table> <tr> <td align="center"><strong>operation</strong></td> <td align="center"><strong>should check not partitioned?</strong></td> </tr> <tr> <td align="center">mode</td> <td align="center"></td> </tr> <tr> <td align="center">outputMode</td> <td align="center"></td> </tr> <tr> <td align="center">trigger</td> <td align="center"></td> </tr> <tr> <td align="center">format</td> <td align="center"></td> </tr> <tr> <td align="center">option/options</td> <td align="center"></td> </tr> <tr> <td align="center">partitionBy</td> <td align="center"></td> </tr> <tr> <td align="center">bucketBy</td> <td align="center"></td> </tr> <tr> <td align="center">sortBy</td> <td align="center"></td> </tr> <tr> <td align="center">save</td> <td align="center"></td> </tr> <tr> <td align="center">queryName</td> <td align="center"></td> </tr> <tr> <td align="center">startStream</td> <td align="center"></td> </tr> <tr> <td align="center">foreach</td> <td align="center">yes</td> </tr> <tr> <td align="center">insertInto</td> <td align="center"></td> </tr> <tr> <td align="center">saveAsTable</td> <td align="center"></td> </tr> <tr> <td align="center">jdbc</td> <td align="center">yes</td> </tr> <tr> <td align="center">json</td> <td align="center"></td> </tr> <tr> <td align="center">parquet</td> <td align="center"></td> </tr> <tr> <td align="center">orc</td> <td align="center"></td> </tr> <tr> <td align="center">text</td> <td align="center"></td> </tr> <tr> <td align="center">csv</td> <td align="center"></td> </tr> </table> ## How was this patch tested? New dedicated tests. Author: Liwei Lin <lwlin7@gmail.com> Closes #13597 from lw-lin/add-assertNotPartitioned.
author: Liwei Lin <lwlin7@gmail.com> 2016-06-10 13:01:29 -0700
committer: Shixiong Zhu <shixiong@databricks.com> 2016-06-10 13:01:29 -0700
commit: fb219029dd1b8d2783c3e202361401048296595c (patch)
tree: 3a1668e08c9204bf30c6c471b0268be691fa7691 /sql
parent: 5c16ad0d522e5124a6977533077afb7b38fc42a1 (diff)
download: spark-fb219029dd1b8d2783c3e202361401048296595c.tar.gz
spark-fb219029dd1b8d2783c3e202361401048296595c.tar.bz2
spark-fb219029dd1b8d2783c3e202361401048296595c.zip
3 files changed, 52 insertions, 10 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 6ce59e885a..78b74f948e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -432,6 +432,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    */
   @Experimental
   def foreach(writer: ForeachWriter[T]): ContinuousQuery = {
+    assertNotPartitioned("foreach")
     assertNotBucketed("foreach")
     assertStreaming(
       "foreach() can only be called on streaming Datasets/DataFrames.")
@@ -562,8 +563,13 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def assertNotBucketed(operation: String): Unit = {
     if (numBuckets.isDefined || sortColumnNames.isDefined) {
-      throw new IllegalArgumentException(
-        s"'$operation' does not support bucketing right now.")
+      throw new AnalysisException(s"'$operation' does not support bucketing right now")
+    }
+  }
+
+  private def assertNotPartitioned(operation: String): Unit = {
+    if (partitioningColumns.isDefined) {
+      throw new AnalysisException( s"'$operation' does not support partitioning")
     }
   }
 
@@ -646,6 +652,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @since 1.4.0
    */
   def jdbc(url: String, table: String, connectionProperties: Properties): Unit = {
+    assertNotPartitioned("jdbc")
+    assertNotBucketed("jdbc")
     assertNotStreaming("jdbc() can only be called on non-continuous queries")
 
     val props = new Properties()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataFrameReaderWriterSuite.scala
index bf6063a4c4..6e0d66ae7f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataFrameReaderWriterSuite.scala
@@ -455,8 +455,8 @@ class DataFrameReaderWriterSuite extends StreamTest with BeforeAndAfter {
       .format("org.apache.spark.sql.streaming.test")
       .stream()
     val w = df.write
-    val e = intercept[IllegalArgumentException](w.bucketBy(1, "text").startStream())
-    assert(e.getMessage == "'startStream' does not support bucketing right now.")
+    val e = intercept[AnalysisException](w.bucketBy(1, "text").startStream())
+    assert(e.getMessage == "'startStream' does not support bucketing right now;")
   }
 
   test("check sortBy() can only be called on non-continuous queries;") {
@@ -464,8 +464,8 @@ class DataFrameReaderWriterSuite extends StreamTest with BeforeAndAfter {
       .format("org.apache.spark.sql.streaming.test")
       .stream()
     val w = df.write
-    val e = intercept[IllegalArgumentException](w.sortBy("text").startStream())
-    assert(e.getMessage == "'startStream' does not support bucketing right now.")
+    val e = intercept[AnalysisException](w.sortBy("text").startStream())
+    assert(e.getMessage == "'startStream' does not support bucketing right now;")
   }
 
   test("check save(path) can only be called on non-continuous queries") {
@@ -558,6 +558,40 @@ class DataFrameReaderWriterSuite extends StreamTest with BeforeAndAfter {
     assert(e.getMessage == "csv() can only be called on non-continuous queries;")
   }
 
+  test("check foreach() does not support partitioning or bucketing") {
+    val df = spark.read
+      .format("org.apache.spark.sql.streaming.test")
+      .stream()
+
+    var w = df.write.partitionBy("value")
+    var e = intercept[AnalysisException](w.foreach(null))
+    Seq("foreach", "partitioning").foreach { s =>
+      assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+    }
+
+    w = df.write.bucketBy(2, "value")
+    e = intercept[AnalysisException](w.foreach(null))
+    Seq("foreach", "bucketing").foreach { s =>
+      assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+    }
+  }
+
+  test("check jdbc() does not support partitioning or bucketing") {
+    val df = spark.read.text(newTextInput)
+
+    var w = df.write.partitionBy("value")
+    var e = intercept[AnalysisException](w.jdbc(null, null, null))
+    Seq("jdbc", "partitioning").foreach { s =>
+      assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+    }
+
+    w = df.write.bucketBy(2, "value")
+    e = intercept[AnalysisException](w.jdbc(null, null, null))
+    Seq("jdbc", "bucketing").foreach { s =>
+      assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+    }
+  }
+
   test("ConsoleSink can be correctly loaded") {
     LastOptions.clear()
     val df = spark.read
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
index 61a281db85..997445114b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
@@ -62,19 +62,19 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
   test("write bucketed data using save()") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
 
-    val e = intercept[IllegalArgumentException] {
+    val e = intercept[AnalysisException] {
       df.write.bucketBy(2, "i").parquet("/tmp/path")
     }
-    assert(e.getMessage == "'save' does not support bucketing right now.")
+    assert(e.getMessage == "'save' does not support bucketing right now;")
   }
 
   test("write bucketed data using insertInto()") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
 
-    val e = intercept[IllegalArgumentException] {
+    val e = intercept[AnalysisException] {
       df.write.bucketBy(2, "i").insertInto("tt")
     }
-    assert(e.getMessage == "'insertInto' does not support bucketing right now.")
+    assert(e.getMessage == "'insertInto' does not support bucketing right now;")
   }
 
   private val df = (0 until 50).map(i => (i % 5, i % 13, i.toString)).toDF("i", "j", "k")
author	Liwei Lin <lwlin7@gmail.com>	2016-06-10 13:01:29 -0700
committer	Shixiong Zhu <shixiong@databricks.com>	2016-06-10 13:01:29 -0700
commit	fb219029dd1b8d2783c3e202361401048296595c (patch)
tree	3a1668e08c9204bf30c6c471b0268be691fa7691 /sql
parent	5c16ad0d522e5124a6977533077afb7b38fc42a1 (diff)
download	spark-fb219029dd1b8d2783c3e202361401048296595c.tar.gz spark-fb219029dd1b8d2783c3e202361401048296595c.tar.bz2 spark-fb219029dd1b8d2783c3e202361401048296595c.zip