diff options
author | Liang-Chi Hsieh <simonh@tw.ibm.com> | 2016-04-22 09:19:36 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-04-22 09:19:36 -0700 |
commit | 056883e070bd258d193fd4d783ab608a19b86c36 (patch) | |
tree | 7072659655b15b2ed60c809efcd7af5c2b002e88 /python/pyspark/sql | |
parent | 5bed13a872dc06d099c810cf4caa15b4f27a1e7c (diff) | |
download | spark-056883e070bd258d193fd4d783ab608a19b86c36.tar.gz spark-056883e070bd258d193fd4d783ab608a19b86c36.tar.bz2 spark-056883e070bd258d193fd4d783ab608a19b86c36.zip |
[SPARK-13266] [SQL] None read/writer options were not transalated to "null"
## What changes were proposed in this pull request?
In Python, the `option` and `options` method of `DataFrameReader` and `DataFrameWriter` were sending the string "None" instead of `null` when passed `None`, therefore making it impossible to send an actual `null`. This fixes that problem.
This is based on #11305 from mathieulongtin.
## How was this patch tested?
Added test to readwriter.py.
Author: Liang-Chi Hsieh <simonh@tw.ibm.com>
Author: mathieu longtin <mathieu.longtin@nuance.com>
Closes #12494 from viirya/py-df-none-option.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/readwriter.py | 9 | ||||
-rw-r--r-- | python/pyspark/sql/tests.py | 3 |
2 files changed, 9 insertions, 3 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 6c809d1139..e39cf1ae03 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -33,10 +33,13 @@ __all__ = ["DataFrameReader", "DataFrameWriter"] def to_str(value): """ - A wrapper over str(), but convert bool values to lower case string + A wrapper over str(), but converts bool values to lower case strings. + If None is given, just returns None, instead of converting it to string "None". """ if isinstance(value, bool): return str(value).lower() + elif value is None: + return value else: return str(value) @@ -398,7 +401,7 @@ class DataFrameWriter(object): def option(self, key, value): """Adds an output option for the underlying data source. """ - self._jwrite = self._jwrite.option(key, value) + self._jwrite = self._jwrite.option(key, to_str(value)) return self @since(1.4) @@ -406,7 +409,7 @@ class DataFrameWriter(object): """Adds output options for the underlying data source. """ for k in options: - self._jwrite = self._jwrite.option(k, options[k]) + self._jwrite = self._jwrite.option(k, to_str(options[k])) return self @since(1.4) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 3b1b2948e9..42e283073f 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -859,6 +859,9 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual(sorted(df.collect()), sorted(actual.collect())) self.sqlCtx.sql("SET spark.sql.sources.default=" + defaultDataSourceName) + csvpath = os.path.join(tempfile.mkdtemp(), 'data') + df.write.option('quote', None).format('csv').save(csvpath) + shutil.rmtree(tmpPath) def test_save_and_load_builder(self): |