diff options
author | Reynold Xin <rxin@databricks.com> | 2015-06-29 00:22:44 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-06-29 00:22:44 -0700 |
commit | 660c6cec75dc165cf5d62cdc1b0951bdb93df365 (patch) | |
tree | d13542b8c07ae207c06da318ee3d9657e60cc696 /python | |
parent | ac2e17b01c0843d928a363d2cc4faf57ec8c8b47 (diff) | |
download | spark-660c6cec75dc165cf5d62cdc1b0951bdb93df365.tar.gz spark-660c6cec75dc165cf5d62cdc1b0951bdb93df365.tar.bz2 spark-660c6cec75dc165cf5d62cdc1b0951bdb93df365.zip |
[SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
Author: Reynold Xin <rxin@databricks.com>
Closes #7079 from rxin/SPARK-8698 and squashes the following commits:
8513e1c [Reynold Xin] [SPARK-8698] partitionBy in Python DataFrame reader/writer interface should not default to empty tuple.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/readwriter.py | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index c4cc62e82a..882a03090e 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -270,12 +270,11 @@ class DataFrameWriter(object): """ if len(cols) == 1 and isinstance(cols[0], (list, tuple)): cols = cols[0] - if len(cols) > 0: - self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols)) + self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols)) return self @since(1.4) - def save(self, path=None, format=None, mode=None, partitionBy=(), **options): + def save(self, path=None, format=None, mode=None, partitionBy=None, **options): """Saves the contents of the :class:`DataFrame` to a data source. The data source is specified by the ``format`` and a set of ``options``. @@ -295,7 +294,9 @@ class DataFrameWriter(object): >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data')) """ - self.partitionBy(partitionBy).mode(mode).options(**options) + self.mode(mode).options(**options) + if partitionBy is not None: + self.partitionBy(partitionBy) if format is not None: self.format(format) if path is None: @@ -315,7 +316,7 @@ class DataFrameWriter(object): self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName) @since(1.4) - def saveAsTable(self, name, format=None, mode=None, partitionBy=(), **options): + def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options): """Saves the content of the :class:`DataFrame` as the specified table. In the case the table already exists, behavior of this function depends on the @@ -334,7 +335,9 @@ class DataFrameWriter(object): :param partitionBy: names of partitioning columns :param options: all other string options """ - self.partitionBy(partitionBy).mode(mode).options(**options) + self.mode(mode).options(**options) + if partitionBy is not None: + self.partitionBy(partitionBy) if format is not None: self.format(format) self._jwrite.saveAsTable(name) @@ -356,7 +359,7 @@ class DataFrameWriter(object): self.mode(mode)._jwrite.json(path) @since(1.4) - def parquet(self, path, mode=None, partitionBy=()): + def parquet(self, path, mode=None, partitionBy=None): """Saves the content of the :class:`DataFrame` in Parquet format at the specified path. :param path: the path in any Hadoop supported file system @@ -370,7 +373,9 @@ class DataFrameWriter(object): >>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data')) """ - self.partitionBy(partitionBy).mode(mode) + self.mode(mode) + if partitionBy is not None: + self.partitionBy(partitionBy) self._jwrite.parquet(path) @since(1.4) |