diff options
author | Liang-Chi Hsieh <simonh@tw.ibm.com> | 2016-04-23 21:15:31 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-04-23 21:15:31 -0700 |
commit | ba5e0b87a043e46e9599695c82d90e7572185aa5 (patch) | |
tree | 7c13d9c4015caac6b187f858cf07ccac70db28da /sql/core/src | |
parent | 1b7eab74e64f554bbf892c8ef7b7ec00b359d2c0 (diff) | |
download | spark-ba5e0b87a043e46e9599695c82d90e7572185aa5.tar.gz spark-ba5e0b87a043e46e9599695c82d90e7572185aa5.tar.bz2 spark-ba5e0b87a043e46e9599695c82d90e7572185aa5.zip |
[SPARK-14838] [SQL] Set default size for ObjecType to avoid failure when estimating sizeInBytes in ObjectProducer
## What changes were proposed in this pull request?
We have logical plans that produce domain objects which are `ObjectType`. As we can't estimate the size of `ObjectType`, we throw an `UnsupportedOperationException` if trying to do that. We should set a default size for `ObjectType` to avoid this failure.
## How was this patch tested?
`DatasetSuite`.
Author: Liang-Chi Hsieh <simonh@tw.ibm.com>
Closes #12599 from viirya/skip-broadcast-objectproducer.
Diffstat (limited to 'sql/core/src')
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index a6e3bd3a91..eee21acf75 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -630,6 +630,29 @@ class DatasetSuite extends QueryTest with SharedSQLContext { // Make sure the generated code for this plan can compile and execute. checkDataset(wideDF.map(_.getLong(0)), 0L until 10 : _*) } + + test("SPARK-14838: estimating sizeInBytes in operators with ObjectProducer shouldn't fail") { + val dataset = Seq( + (0, 3, 54f), + (0, 4, 44f), + (0, 5, 42f), + (1, 3, 39f), + (1, 5, 33f), + (1, 4, 26f), + (2, 3, 51f), + (2, 5, 45f), + (2, 4, 30f) + ).toDF("user", "item", "rating") + + val actual = dataset + .select("user", "item") + .as[(Int, Int)] + .groupByKey(_._1) + .mapGroups { case (src, ids) => (src, ids.map(_._2).toArray) } + .toDF("id", "actual") + + dataset.join(actual, dataset("user") === actual("id")).collect() + } } case class OtherTuple(_1: String, _2: Int) |