diff options
author | Stephen Haberman <stephen@exigencecorp.com> | 2013-03-22 08:54:44 -0500 |
---|---|---|
committer | Stephen Haberman <stephen@exigencecorp.com> | 2013-03-22 08:54:44 -0500 |
commit | 1c67c7dfd1660dd39226742eedcb2948ab1445d0 (patch) | |
tree | 6d20b0585e83a60f1a258372d53d12fca01656eb /core/src/test | |
parent | ca4d083ec825aa674fdd7d1dcd52a99ef8dcdf8b (diff) | |
download | spark-1c67c7dfd1660dd39226742eedcb2948ab1445d0.tar.gz spark-1c67c7dfd1660dd39226742eedcb2948ab1445d0.tar.bz2 spark-1c67c7dfd1660dd39226742eedcb2948ab1445d0.zip |
Add a shuffle parameter to coalesce.
This is useful for when you want just 1 output file (part-00000) but
still up the upstream RDD to be computed in parallel.
Diffstat (limited to 'core/src/test')
-rw-r--r-- | core/src/test/scala/spark/RDDSuite.scala | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/core/src/test/scala/spark/RDDSuite.scala b/core/src/test/scala/spark/RDDSuite.scala index 9739ba869b..bcbb472f6c 100644 --- a/core/src/test/scala/spark/RDDSuite.scala +++ b/core/src/test/scala/spark/RDDSuite.scala @@ -3,7 +3,7 @@ package spark import scala.collection.mutable.HashMap import org.scalatest.FunSuite import spark.SparkContext._ -import spark.rdd.{CoalescedRDD, PartitionPruningRDD} +import spark.rdd.{CoalescedRDD, PartitionPruningRDD, ShuffledRDD} class RDDSuite extends FunSuite with LocalSparkContext { @@ -154,6 +154,10 @@ class RDDSuite extends FunSuite with LocalSparkContext { assert(coalesced4.collect().toList === (1 to 10).toList) assert(coalesced4.glom().collect().map(_.toList).toList === (1 to 10).map(x => List(x)).toList) + + // we can optionally shuffle to keep the upstream parallel + val coalesced5 = data.coalesce(1, shuffle = true) + assert(coalesced5.dependencies.head.rdd.dependencies.head.rdd.asInstanceOf[ShuffledRDD[_, _]] ne null) } test("zipped RDDs") { |