Always use a shuffle

author: Patrick Wendell <pwendell@gmail.com> 2013-10-24 10:52:25 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2013-10-24 14:31:34 -0700
commit: 2fda84fe3ffa4d887e58b4d1717765a42a30b0f9 (patch)
tree: ed7b401310a80ba7168c7b5a371d2f35d30d5f1f /core
parent: 08c1a42d7d9edef02a24a3bc5045b2dce035a93b (diff)
download: spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.tar.gz
spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.tar.bz2
spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.zip
1 files changed, 7 insertions, 15 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e2652f13c4..17bc2515f2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -268,22 +268,14 @@ abstract class RDD[T: ClassManifest](
   /**
    * Return a new RDD that has exactly numPartitions partitions.
    *
-   * Used to increase or decrease the level of parallelism in this RDD. By default, this will use
-   * a shuffle to redistribute data. If you are shrinking the RDD into fewer partitions, you can
-   * set skipShuffle = false to avoid a shuffle. Skipping shuffles is not supported when
-   * increasing the number of partitions.
+   * Used to increase or decrease the level of parallelism in this RDD. This will use
+   * a shuffle to redistribute data.
    *
-   * Similar to `coalesce`, but shuffles by default, allowing you to call this safely even
-   * if you don't know the number of partitions.
-   */
-  def repartition(numPartitions: Int, skipShuffle: Boolean = false): RDD[T] = {
-    if (skipShuffle && numPartitions > this.partitions.size) {
-      val msg = "repartition must grow %s from %s to %s partitions, cannot skip shuffle.".format(
-        this.name, this.partitions.size, numPartitions
-      )
-      throw new IllegalArgumentException(msg)
-    }
-    coalesce(numPartitions, !skipShuffle)
+   * If you are decreasing the number of partitions in this RDD, consider using `coalesce`,
+   * which can avoid performing a shuffle.
+   */
+  def repartition(numPartitions: Int): RDD[T] = {
+    coalesce(numPartitions, true)
   }
 
   /**
author	Patrick Wendell <pwendell@gmail.com>	2013-10-24 10:52:25 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2013-10-24 14:31:34 -0700
commit	2fda84fe3ffa4d887e58b4d1717765a42a30b0f9 (patch)
tree	ed7b401310a80ba7168c7b5a371d2f35d30d5f1f /core
parent	08c1a42d7d9edef02a24a3bc5045b2dce035a93b (diff)
download	spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.tar.gz spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.tar.bz2 spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.zip