aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorSandy Ryza <sandy@cloudera.com>2014-07-17 23:57:08 -0700
committerMatei Zaharia <matei@databricks.com>2014-07-17 23:57:08 -0700
commite52b8719cf0603e79ded51cbe1c9f88eea8b56de (patch)
treeb2e57d42583a609a0e74e7575d69af8c4b0898c6 /core
parent29809a6d58bfe3700350ce1988ff7083881c4382 (diff)
downloadspark-e52b8719cf0603e79ded51cbe1c9f88eea8b56de.tar.gz
spark-e52b8719cf0603e79ded51cbe1c9f88eea8b56de.tar.bz2
spark-e52b8719cf0603e79ded51cbe1c9f88eea8b56de.zip
SPARK-2553. CoGroupedRDD unnecessarily allocates a Tuple2 per dependency...
... per key My humble opinion is that avoiding allocations in this performance-critical section is worth the extra code. Author: Sandy Ryza <sandy@cloudera.com> Closes #1461 from sryza/sandy-spark-2553 and squashes the following commits: 7eaf7f2 [Sandy Ryza] SPARK-2553. CoGroupedRDD unnecessarily allocates a Tuple2 per dependency per key
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala6
1 files changed, 5 insertions, 1 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 5951865e56..b284b636f2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -180,7 +180,11 @@ class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part:
}
val mergeCombiners: (CoGroupCombiner, CoGroupCombiner) => CoGroupCombiner =
(combiner1, combiner2) => {
- combiner1.zip(combiner2).map { case (v1, v2) => v1 ++ v2 }
+ var depNum = 0
+ while (depNum < numRdds) {
+ combiner1(depNum) ++= combiner2(depNum)
+ depNum += 1
+ }
}
new ExternalAppendOnlyMap[K, CoGroupValue, CoGroupCombiner](
createCombiner, mergeValue, mergeCombiners)