aboutsummaryrefslogtreecommitdiff
path: root/core/src/test
diff options
context:
space:
mode:
authorMatei Zaharia <matei.zaharia@gmail.com>2013-09-09 13:45:40 -0700
committerMatei Zaharia <matei.zaharia@gmail.com>2013-09-09 13:45:40 -0700
commita85758c20006537e7863cf30bc3c4b6b6efd4981 (patch)
treee0ddc6cac1625313a29f1cabb143ff5f6b90a945 /core/src/test
parent084fc369614766ce9ca3285c784afceddd111d25 (diff)
parent59003d387d399d6280a42b4c79eef18cfdbfe72b (diff)
downloadspark-a85758c20006537e7863cf30bc3c4b6b6efd4981.tar.gz
spark-a85758c20006537e7863cf30bc3c4b6b6efd4981.tar.bz2
spark-a85758c20006537e7863cf30bc3c4b6b6efd4981.zip
Merge pull request #907 from stephenh/document_coalesce_shuffle
Add better docs for coalesce.
Diffstat (limited to 'core/src/test')
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala14
1 files changed, 10 insertions, 4 deletions
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index adc971050e..c1df5e151e 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -140,7 +140,7 @@ class RDDSuite extends FunSuite with SharedSparkContext {
assert(rdd.union(emptyKv).collect().size === 2)
}
- test("cogrouped RDDs") {
+ test("coalesced RDDs") {
val data = sc.parallelize(1 to 10, 10)
val coalesced1 = data.coalesce(2)
@@ -175,8 +175,14 @@ class RDDSuite extends FunSuite with SharedSparkContext {
val coalesced5 = data.coalesce(1, shuffle = true)
assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _, _]] !=
null)
+
+ // when shuffling, we can increase the number of partitions
+ val coalesced6 = data.coalesce(20, shuffle = true)
+ assert(coalesced6.partitions.size === 20)
+ assert(coalesced6.collect().toSet === (1 to 10).toSet)
}
- test("cogrouped RDDs with locality") {
+
+ test("coalesced RDDs with locality") {
val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b"))))
val coal3 = data3.coalesce(3)
val list3 = coal3.partitions.map(p => p.asInstanceOf[CoalescedRDDPartition].preferredLocation)
@@ -197,11 +203,11 @@ class RDDSuite extends FunSuite with SharedSparkContext {
val coalesced4 = data.coalesce(20)
val listOfLists = coalesced4.glom().collect().map(_.toList).toList
val sortedList = listOfLists.sortWith{ (x, y) => !x.isEmpty && (y.isEmpty || (x(0) < y(0))) }
- assert( sortedList === (1 to 9).
+ assert(sortedList === (1 to 9).
map{x => List(x)}.toList, "Tried coalescing 9 partitions to 20 but didn't get 9 back")
}
- test("cogrouped RDDs with locality, large scale (10K partitions)") {
+ test("coalesced RDDs with locality, large scale (10K partitions)") {
// large scale experiment
import collection.mutable
val rnd = scala.util.Random