aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala22
1 files changed, 7 insertions, 15 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e2652f13c4..17bc2515f2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -268,22 +268,14 @@ abstract class RDD[T: ClassManifest](
/**
* Return a new RDD that has exactly numPartitions partitions.
*
- * Used to increase or decrease the level of parallelism in this RDD. By default, this will use
- * a shuffle to redistribute data. If you are shrinking the RDD into fewer partitions, you can
- * set skipShuffle = false to avoid a shuffle. Skipping shuffles is not supported when
- * increasing the number of partitions.
+ * Used to increase or decrease the level of parallelism in this RDD. This will use
+ * a shuffle to redistribute data.
*
- * Similar to `coalesce`, but shuffles by default, allowing you to call this safely even
- * if you don't know the number of partitions.
- */
- def repartition(numPartitions: Int, skipShuffle: Boolean = false): RDD[T] = {
- if (skipShuffle && numPartitions > this.partitions.size) {
- val msg = "repartition must grow %s from %s to %s partitions, cannot skip shuffle.".format(
- this.name, this.partitions.size, numPartitions
- )
- throw new IllegalArgumentException(msg)
- }
- coalesce(numPartitions, !skipShuffle)
+ * If you are decreasing the number of partitions in this RDD, consider using `coalesce`,
+ * which can avoid performing a shuffle.
+ */
+ def repartition(numPartitions: Int): RDD[T] = {
+ coalesce(numPartitions, true)
}
/**