aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2011-03-06 11:11:47 -0800
committerMatei Zaharia <matei@eecs.berkeley.edu>2011-03-06 11:11:47 -0800
commite1436f1eaa32e968ae431bee078a54d1c0285535 (patch)
tree2e474d1e3f613c68d1bf2d234d3492ca6d63ef47 /core
parenta789e9aaea2d44273b0089496ea38b764fd95e70 (diff)
parent370b95816f0adbb1e47508b38240eef8f36367bb (diff)
downloadspark-e1436f1eaa32e968ae431bee078a54d1c0285535.tar.gz
spark-e1436f1eaa32e968ae431bee078a54d1c0285535.tar.bz2
spark-e1436f1eaa32e968ae431bee078a54d1c0285535.zip
Merge remote branch 'origin/master' into new-rdds
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/spark/SizeEstimator.scala17
1 files changed, 15 insertions, 2 deletions
diff --git a/core/src/main/scala/spark/SizeEstimator.scala b/core/src/main/scala/spark/SizeEstimator.scala
index 12dd19d704..a3774fb055 100644
--- a/core/src/main/scala/spark/SizeEstimator.scala
+++ b/core/src/main/scala/spark/SizeEstimator.scala
@@ -5,6 +5,7 @@ import java.lang.reflect.Modifier
import java.lang.reflect.{Array => JArray}
import java.util.IdentityHashMap
import java.util.concurrent.ConcurrentHashMap
+import java.util.Random
import scala.collection.mutable.ArrayBuffer
@@ -98,8 +99,20 @@ object SizeEstimator {
state.size += length * primitiveSize(elementClass)
} else {
state.size += length * POINTER_SIZE
- for (i <- 0 until length) {
- state.enqueue(JArray.get(array, i))
+ if (length <= 100) {
+ for (i <- 0 until length) {
+ state.enqueue(JArray.get(array, i))
+ }
+ } else {
+ // Estimate the size of a large array by sampling elements.
+ // TODO: Add a config setting for turning this off?
+ var size = 0.0
+ val rand = new Random(42)
+ for (i <- 0 until 100) {
+ val elem = JArray.get(array, rand.nextInt(length))
+ size += SizeEstimator.estimate(elem)
+ }
+ state.size += ((length / 100.0) * size).toLong
}
}
}