diff options
author | Davies Liu <davies@databricks.com> | 2015-02-18 01:00:54 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-02-18 01:00:54 -0800 |
commit | c1b6fa9838f9d26d60fab3b05a96649882e3dd5b (patch) | |
tree | 8a43cd7a1278b1c81b9f9340b1b45ffa26e9df50 | |
parent | de0dd6de2476c22be3f41f1bf0b3ef7ffeb60001 (diff) | |
download | spark-c1b6fa9838f9d26d60fab3b05a96649882e3dd5b.tar.gz spark-c1b6fa9838f9d26d60fab3b05a96649882e3dd5b.tar.bz2 spark-c1b6fa9838f9d26d60fab3b05a96649882e3dd5b.zip |
[SPARK-5878] fix DataFrame.repartition() in Python
Also add tests for distinct()
Author: Davies Liu <davies@databricks.com>
Closes #4667 from davies/repartition and squashes the following commits:
79059fd [Davies Liu] add test
cb4915e [Davies Liu] fix repartition
-rw-r--r-- | python/pyspark/sql/dataframe.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 388033d385..52bd75bf8a 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -434,12 +434,18 @@ class DataFrame(object): def repartition(self, numPartitions): """ Return a new :class:`DataFrame` that has exactly `numPartitions` partitions. + + >>> df.repartition(10).rdd.getNumPartitions() + 10 """ - return DataFrame(self._jdf.repartition(numPartitions, None), self.sql_ctx) + return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx) def distinct(self): """ Return a new :class:`DataFrame` containing the distinct rows in this DataFrame. + + >>> df.distinct().count() + 2L """ return DataFrame(self._jdf.distinct(), self.sql_ctx) |