aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorAli Ghodsi <alig@cs.berkeley.edu>2013-08-14 11:15:39 -0700
committerAli Ghodsi <alig@cs.berkeley.edu>2013-08-20 16:13:36 -0700
commit66edf854aa585d23e47fc0bfb7fdd4e23c0ea592 (patch)
treea82c35d6fdabdf38ea0fe80b240d357ae4a4c19a /core
parent1ede102ba5863f6cee27437b0adbc4d54cedffb3 (diff)
downloadspark-66edf854aa585d23e47fc0bfb7fdd4e23c0ea592.tar.gz
spark-66edf854aa585d23e47fc0bfb7fdd4e23c0ea592.tar.bz2
spark-66edf854aa585d23e47fc0bfb7fdd4e23c0ea592.zip
Bug, should compute slack wrt parent partition size, not number of bins
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/spark/rdd/CoalescedRDD.scala4
1 files changed, 2 insertions, 2 deletions
diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala
index 09ae9a8fa6..f999e9b0ec 100644
--- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/spark/rdd/CoalescedRDD.scala
@@ -50,7 +50,7 @@ private[spark] case class CoalescedRDDPartition(
class CoalescedRDD[T: ClassManifest](
@transient var prev: RDD[T],
maxPartitions: Int,
- balanceSlack: Double = 0.20 )
+ balanceSlack: Double = 0.10 )
extends RDD[T](prev.context, Nil) { // Nil since we implement getDependencies
override def getPartitions: Array[Partition] = {
@@ -158,7 +158,7 @@ class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double)
// determines the tradeoff between load-balancing the partitions sizes and their locality
// e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality
- val slack = (balanceSlack * maxPartitions).toInt
+ val slack = (balanceSlack * prev.partitions.size).toInt
private var noLocality = true // if true if no preferredLocations exists for parent RDD