aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-11-18 11:49:12 -0800
committerYin Huai <yhuai@databricks.com>2015-11-18 11:49:12 -0800
commit6f99522d13d8db9fcc767f7c3189557b9a53d283 (patch)
tree61aa47395a77a6dfb22169d77a80a3f6b820671a /sql
parent90a7519daaa7f4ee3be7c5a9aa244120811ff6eb (diff)
downloadspark-6f99522d13d8db9fcc767f7c3189557b9a53d283.tar.gz
spark-6f99522d13d8db9fcc767f7c3189557b9a53d283.tar.bz2
spark-6f99522d13d8db9fcc767f7c3189557b9a53d283.zip
[SPARK-11792] [SQL] [FOLLOW-UP] Change SizeEstimation to KnownSizeEstimation and make estimatedSize return Long instead of Option[Long]
https://issues.apache.org/jira/browse/SPARK-11792 The main changes include: * Renaming `SizeEstimation` to `KnownSizeEstimation`. Hopefully this new name has more information. * Making `estimatedSize` return `Long` instead of `Option[Long]`. * In `UnsaveHashedRelation`, `estimatedSize` will delegate the work to `SizeEstimator` if we have not created a `BytesToBytesMap`. Since we will put `UnsaveHashedRelation` to `BlockManager`, it is generally good to let it provide a more accurate size estimation. Also, if we do not put `BytesToBytesMap` directly into `BlockerManager`, I feel it is not really necessary to make `BytesToBytesMap` extends `KnownSizeEstimation`. Author: Yin Huai <yhuai@databricks.com> Closes #9813 from yhuai/SPARK-11792-followup.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala12
1 files changed, 8 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 49ae09bf53..aebfea5832 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.metric.{LongSQLMetric, SQLMetrics}
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.map.BytesToBytesMap
import org.apache.spark.unsafe.memory.MemoryLocation
-import org.apache.spark.util.{SizeEstimation, Utils}
+import org.apache.spark.util.{SizeEstimator, KnownSizeEstimation, Utils}
import org.apache.spark.util.collection.CompactBuffer
import org.apache.spark.{SparkConf, SparkEnv}
@@ -190,7 +190,7 @@ private[execution] object HashedRelation {
private[joins] final class UnsafeHashedRelation(
private var hashTable: JavaHashMap[UnsafeRow, CompactBuffer[UnsafeRow]])
extends HashedRelation
- with SizeEstimation
+ with KnownSizeEstimation
with Externalizable {
private[joins] def this() = this(null) // Needed for serialization
@@ -217,8 +217,12 @@ private[joins] final class UnsafeHashedRelation(
}
}
- override def estimatedSize: Option[Long] = {
- Option(binaryMap).map(_.getTotalMemoryConsumption)
+ override def estimatedSize: Long = {
+ if (binaryMap != null) {
+ binaryMap.getTotalMemoryConsumption
+ } else {
+ SizeEstimator.estimate(hashTable)
+ }
}
override def get(key: InternalRow): Seq[InternalRow] = {