aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorShuo Xiang <shuoxiangpub@gmail.com>2015-05-07 20:55:08 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-05-07 20:55:08 -0700
commit92f8f803a68e0c16771e9793098c6d76dfdf99af (patch)
treeed96632ab2141ed4bc54694ee08b179b2c258560 /mllib
parentcd1d4110cfffb413ab585cf1cc8f1264243cb393 (diff)
downloadspark-92f8f803a68e0c16771e9793098c6d76dfdf99af.tar.gz
spark-92f8f803a68e0c16771e9793098c6d76dfdf99af.tar.bz2
spark-92f8f803a68e0c16771e9793098c6d76dfdf99af.zip
[SPARK-7452] [MLLIB] fix bug in topBykey and update test
the toArray function of the BoundedPriorityQueue does not necessarily preserve order. Add a counter-example as the test, which would fail the original impl. Author: Shuo Xiang <shuoxiangpub@gmail.com> Closes #5990 from coderxiang/topbykey-test and squashes the following commits: 98804c9 [Shuo Xiang] fix bug in topBykey and update test
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala9
2 files changed, 6 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 5af55aaf84..1b93e2d764 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
combOp = (queue1, queue2) => {
queue1 ++= queue2
}
- ).mapValues(_.toArray.reverse) // This is an min-heap, so we reverse the order.
+ ).mapValues(_.toArray.sorted(ord.reverse)) // This is an min-heap, so we reverse the order.
}
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
index cb8fe4dba9..57216e8eb4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
test("topByKey") {
- val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2)
- .topByKey(2)
+ val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5,
+ 1), (3, 5)), 2)
+ .topByKey(5)
.collectAsMap()
assert(topMap.size === 3)
- assert(topMap(1) === Array(2, 1))
- assert(topMap(3) === Array(7, 5))
+ assert(topMap(1) === Array(7, 6, 3, 2, 1))
+ assert(topMap(3) === Array(7, 5, 2))
assert(topMap(5) === Array(1))
}
}