aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2013-10-24 10:52:25 -0700
committerPatrick Wendell <pwendell@gmail.com>2013-10-24 14:31:34 -0700
commit2fda84fe3ffa4d887e58b4d1717765a42a30b0f9 (patch)
treeed7b401310a80ba7168c7b5a371d2f35d30d5f1f /core
parent08c1a42d7d9edef02a24a3bc5045b2dce035a93b (diff)
downloadspark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.tar.gz
spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.tar.bz2
spark-2fda84fe3ffa4d887e58b4d1717765a42a30b0f9.zip
Always use a shuffle
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala22
1 files changed, 7 insertions, 15 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e2652f13c4..17bc2515f2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -268,22 +268,14 @@ abstract class RDD[T: ClassManifest](
/**
* Return a new RDD that has exactly numPartitions partitions.
*
- * Used to increase or decrease the level of parallelism in this RDD. By default, this will use
- * a shuffle to redistribute data. If you are shrinking the RDD into fewer partitions, you can
- * set skipShuffle = false to avoid a shuffle. Skipping shuffles is not supported when
- * increasing the number of partitions.
+ * Used to increase or decrease the level of parallelism in this RDD. This will use
+ * a shuffle to redistribute data.
*
- * Similar to `coalesce`, but shuffles by default, allowing you to call this safely even
- * if you don't know the number of partitions.
- */
- def repartition(numPartitions: Int, skipShuffle: Boolean = false): RDD[T] = {
- if (skipShuffle && numPartitions > this.partitions.size) {
- val msg = "repartition must grow %s from %s to %s partitions, cannot skip shuffle.".format(
- this.name, this.partitions.size, numPartitions
- )
- throw new IllegalArgumentException(msg)
- }
- coalesce(numPartitions, !skipShuffle)
+ * If you are decreasing the number of partitions in this RDD, consider using `coalesce`,
+ * which can avoid performing a shuffle.
+ */
+ def repartition(numPartitions: Int): RDD[T] = {
+ coalesce(numPartitions, true)
}
/**