aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-11-18 00:42:52 -0800
committerReynold Xin <rxin@databricks.com>2015-11-18 00:42:52 -0800
commit1714350bddd78cd1398e1a816f675ab729001081 (patch)
treef567319f64994f8492c8d69c8aacea83d87b799a /sql
parent5e2b44474c2b838bebeffe5ba5cd72961b0cd31e (diff)
downloadspark-1714350bddd78cd1398e1a816f675ab729001081.tar.gz
spark-1714350bddd78cd1398e1a816f675ab729001081.tar.bz2
spark-1714350bddd78cd1398e1a816f675ab729001081.zip
[SPARK-11792][SQL] SizeEstimator cannot provide a good size estimation of UnsafeHashedRelations
https://issues.apache.org/jira/browse/SPARK-11792 Right now, SizeEstimator will "think" a small UnsafeHashedRelation is several GBs. Author: Yin Huai <yhuai@databricks.com> Closes #9788 from yhuai/SPARK-11792.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala10
1 files changed, 8 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index cc8abb1ba4..49ae09bf53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.metric.{LongSQLMetric, SQLMetrics}
import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.map.BytesToBytesMap
import org.apache.spark.unsafe.memory.MemoryLocation
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{SizeEstimation, Utils}
import org.apache.spark.util.collection.CompactBuffer
import org.apache.spark.{SparkConf, SparkEnv}
@@ -189,7 +189,9 @@ private[execution] object HashedRelation {
*/
private[joins] final class UnsafeHashedRelation(
private var hashTable: JavaHashMap[UnsafeRow, CompactBuffer[UnsafeRow]])
- extends HashedRelation with Externalizable {
+ extends HashedRelation
+ with SizeEstimation
+ with Externalizable {
private[joins] def this() = this(null) // Needed for serialization
@@ -215,6 +217,10 @@ private[joins] final class UnsafeHashedRelation(
}
}
+ override def estimatedSize: Option[Long] = {
+ Option(binaryMap).map(_.getTotalMemoryConsumption)
+ }
+
override def get(key: InternalRow): Seq[InternalRow] = {
val unsafeKey = key.asInstanceOf[UnsafeRow]