aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-12-01 22:41:48 -0800
committerDavies Liu <davies.liu@gmail.com>2015-12-01 22:41:48 -0800
commit4375eb3f48fc7ae90caf6c21a0d3ab0b66bf4efa (patch)
treebb8558193ac5a828fcbc7d3d5397d756ada8269b
parent0f37d1d7ed7f6e34f98f2a3c274918de29e7a1d7 (diff)
downloadspark-4375eb3f48fc7ae90caf6c21a0d3ab0b66bf4efa.tar.gz
spark-4375eb3f48fc7ae90caf6c21a0d3ab0b66bf4efa.tar.bz2
spark-4375eb3f48fc7ae90caf6c21a0d3ab0b66bf4efa.zip
[SPARK-12090] [PYSPARK] consider shuffle in coalesce()
Author: Davies Liu <davies@databricks.com> Closes #10090 from davies/fix_coalesce.
-rw-r--r--python/pyspark/rdd.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 4b4d59647b..00bb9a62e9 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2015,7 +2015,7 @@ class RDD(object):
>>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
[[1, 2, 3, 4, 5]]
"""
- jrdd = self._jrdd.coalesce(numPartitions)
+ jrdd = self._jrdd.coalesce(numPartitions, shuffle)
return RDD(jrdd, self.ctx, self._jrdd_deserializer)
def zip(self, other):