diff options
author | Tejas Patil <tejasp@fb.com> | 2017-02-15 22:45:58 -0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2017-02-15 22:45:58 -0800 |
commit | f041e55eefe1d8a995fed321c66bccbd8b8e5255 (patch) | |
tree | b82f5402f3a5abf38127322be8f5e5657654f7d7 /sql/core | |
parent | 8487902a98caf727ba3f9820452b01276d20ede3 (diff) | |
download | spark-f041e55eefe1d8a995fed321c66bccbd8b8e5255.tar.gz spark-f041e55eefe1d8a995fed321c66bccbd8b8e5255.tar.bz2 spark-f041e55eefe1d8a995fed321c66bccbd8b8e5255.zip |
[SPARK-19618][SQL] Inconsistency wrt max. buckets allowed from Dataframe API vs SQL
## What changes were proposed in this pull request?
Jira: https://issues.apache.org/jira/browse/SPARK-19618
Moved the check for validating number of buckets from `DataFrameWriter` to `BucketSpec` creation
## How was this patch tested?
- Added more unit tests
Author: Tejas Patil <tejasp@fb.com>
Closes #16948 from tejasapatil/SPARK-19618_max_buckets.
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 1 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala | 28 |
2 files changed, 15 insertions, 14 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 1d834b1821..cdae8ea458 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -275,7 +275,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } numBuckets.map { n => - require(n > 0 && n < 100000, "Bucket number must be greater than 0 and less than 100000.") BucketSpec(n, bucketColumnNames.get, sortColumnNames.getOrElse(Nil)) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index 99da1969fc..4a42f8ea79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -206,7 +206,7 @@ class CreateTableAsSelectSuite } } - test("create table using as select - with non-zero buckets") { + test("create table using as select - with valid number of buckets") { val catalog = spark.sessionState.catalog withTable("t") { sql( @@ -222,19 +222,21 @@ class CreateTableAsSelectSuite } } - test("create table using as select - with zero buckets") { + test("create table using as select - with invalid number of buckets") { withTable("t") { - val e = intercept[AnalysisException] { - sql( - s""" - |CREATE TABLE t USING PARQUET - |OPTIONS (PATH '${path.toURI}') - |CLUSTERED BY (a) SORTED BY (b) INTO 0 BUCKETS - |AS SELECT 1 AS a, 2 AS b - """.stripMargin - ) - }.getMessage - assert(e.contains("Expected positive number of buckets, but got `0`")) + Seq(0, 100000).foreach(numBuckets => { + val e = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE t USING PARQUET + |OPTIONS (PATH '${path.toURI}') + |CLUSTERED BY (a) SORTED BY (b) INTO $numBuckets BUCKETS + |AS SELECT 1 AS a, 2 AS b + """.stripMargin + ) + }.getMessage + assert(e.contains("Number of buckets should be greater than 0 but less than 100000")) + }) } } |