diff options
author | Matei Zaharia <matei@eecs.berkeley.edu> | 2011-03-06 11:11:47 -0800 |
---|---|---|
committer | Matei Zaharia <matei@eecs.berkeley.edu> | 2011-03-06 11:11:47 -0800 |
commit | e1436f1eaa32e968ae431bee078a54d1c0285535 (patch) | |
tree | 2e474d1e3f613c68d1bf2d234d3492ca6d63ef47 | |
parent | a789e9aaea2d44273b0089496ea38b764fd95e70 (diff) | |
parent | 370b95816f0adbb1e47508b38240eef8f36367bb (diff) | |
download | spark-e1436f1eaa32e968ae431bee078a54d1c0285535.tar.gz spark-e1436f1eaa32e968ae431bee078a54d1c0285535.tar.bz2 spark-e1436f1eaa32e968ae431bee078a54d1c0285535.zip |
Merge remote branch 'origin/master' into new-rdds
-rw-r--r-- | core/src/main/scala/spark/SizeEstimator.scala | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/core/src/main/scala/spark/SizeEstimator.scala b/core/src/main/scala/spark/SizeEstimator.scala index 12dd19d704..a3774fb055 100644 --- a/core/src/main/scala/spark/SizeEstimator.scala +++ b/core/src/main/scala/spark/SizeEstimator.scala @@ -5,6 +5,7 @@ import java.lang.reflect.Modifier import java.lang.reflect.{Array => JArray} import java.util.IdentityHashMap import java.util.concurrent.ConcurrentHashMap +import java.util.Random import scala.collection.mutable.ArrayBuffer @@ -98,8 +99,20 @@ object SizeEstimator { state.size += length * primitiveSize(elementClass) } else { state.size += length * POINTER_SIZE - for (i <- 0 until length) { - state.enqueue(JArray.get(array, i)) + if (length <= 100) { + for (i <- 0 until length) { + state.enqueue(JArray.get(array, i)) + } + } else { + // Estimate the size of a large array by sampling elements. + // TODO: Add a config setting for turning this off? + var size = 0.0 + val rand = new Random(42) + for (i <- 0 until 100) { + val elem = JArray.get(array, rand.nextInt(length)) + size += SizeEstimator.estimate(elem) + } + state.size += ((length / 100.0) * size).toLong } } } |