diff options
author | Qiping Li <liqiping1991@gmail.com> | 2015-04-29 23:52:16 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-04-29 23:52:16 +0100 |
commit | 7f4b583733714bbecb43fb0823134bf2ec720a17 (patch) | |
tree | f7e638a2e9c8108fd6b2a85ef6c84b2c7885c4a9 | |
parent | d7dbce8f7da8a7fd01df6633a6043f51161b7d18 (diff) | |
download | spark-7f4b583733714bbecb43fb0823134bf2ec720a17.tar.gz spark-7f4b583733714bbecb43fb0823134bf2ec720a17.tar.bz2 spark-7f4b583733714bbecb43fb0823134bf2ec720a17.zip |
[SPARK-7181] [CORE] fix inifite loop in Externalsorter's mergeWithAggregation
see [SPARK-7181](https://issues.apache.org/jira/browse/SPARK-7181).
Author: Qiping Li <liqiping1991@gmail.com>
Closes #5737 from chouqin/externalsorter and squashes the following commits:
2924b93 [Qiping Li] fix inifite loop in Externalsorter's mergeWithAggregation
-rw-r--r-- | core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala | 3 | ||||
-rw-r--r-- | core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala | 5 |
2 files changed, 6 insertions, 2 deletions
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala index ef3cac6225..4ed8a740f9 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala @@ -527,7 +527,8 @@ private[spark] class ExternalSorter[K, V, C]( val k = elem._1 var c = elem._2 while (sorted.hasNext && sorted.head._1 == k) { - c = mergeCombiners(c, sorted.head._2) + val pair = sorted.next() + c = mergeCombiners(c, pair._2) } (k, c) } diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala index 9ff067f86a..de26aa351b 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala @@ -506,7 +506,10 @@ class ExternalSorterSuite extends FunSuite with LocalSparkContext with PrivateMe val agg = new Aggregator[Int, Int, Int](i => i, (i, j) => i + j, (i, j) => i + j) val ord = implicitly[Ordering[Int]] val sorter = new ExternalSorter(Some(agg), Some(new HashPartitioner(3)), Some(ord), None) - sorter.insertAll((0 until 100000).iterator.map(i => (i / 2, i))) + + // avoid combine before spill + sorter.insertAll((0 until 50000).iterator.map(i => (i , 2 * i))) + sorter.insertAll((0 until 50000).iterator.map(i => (i, 2 * i + 1))) val results = sorter.partitionedIterator.map{case (p, vs) => (p, vs.toSet)}.toSet val expected = (0 until 3).map(p => { (p, (0 until 50000).map(i => (i, i * 4 + 1)).filter(_._1 % 3 == p).toSet) |