aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheolsoo Park <cheolsoop@netflix.com>2015-06-29 00:13:39 -0700
committerReynold Xin <rxin@databricks.com>2015-06-29 00:21:31 -0700
commit9d9c4b476d194f8f0102c4bf0fc263de9d1fb3be (patch)
treef3172a87730cb47ddb1fc1725ff1432f82f1bc52
parente1bbf1a080296e4d3d692379f06b7db0d0629573 (diff)
downloadspark-9d9c4b476d194f8f0102c4bf0fc263de9d1fb3be.tar.gz
spark-9d9c4b476d194f8f0102c4bf0fc263de9d1fb3be.tar.bz2
spark-9d9c4b476d194f8f0102c4bf0fc263de9d1fb3be.zip
[SPARK-8355] [SQL] Python DataFrameReader/Writer should mirror Scala
I compared PySpark DataFrameReader/Writer against Scala ones. `Option` function is missing in both reader and writer, but the rest seems to all match. I added `Option` to reader and writer and updated the `pyspark-sql` test. Author: Cheolsoo Park <cheolsoop@netflix.com> Closes #7078 from piaozhexiu/SPARK-8355 and squashes the following commits: c63d419 [Cheolsoo Park] Fix version 524e0aa [Cheolsoo Park] Add option function to df reader and writer (cherry picked from commit ac2e17b01c0843d928a363d2cc4faf57ec8c8b47) Signed-off-by: Reynold Xin <rxin@databricks.com>
-rw-r--r--python/pyspark/sql/readwriter.py14
-rw-r--r--python/pyspark/sql/tests.py1
2 files changed, 15 insertions, 0 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 1b7bc0f9a1..c4cc62e82a 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -73,6 +73,13 @@ class DataFrameReader(object):
self._jreader = self._jreader.schema(jschema)
return self
+ @since(1.5)
+ def option(self, key, value):
+ """Adds an input option for the underlying data source.
+ """
+ self._jreader = self._jreader.option(key, value)
+ return self
+
@since(1.4)
def options(self, **options):
"""Adds input options for the underlying data source.
@@ -235,6 +242,13 @@ class DataFrameWriter(object):
self._jwrite = self._jwrite.format(source)
return self
+ @since(1.5)
+ def option(self, key, value):
+ """Adds an output option for the underlying data source.
+ """
+ self._jwrite = self._jwrite.option(key, value)
+ return self
+
@since(1.4)
def options(self, **options):
"""Adds output options for the underlying data source.
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 5c2589089b..f90277697d 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -549,6 +549,7 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
df.write.mode("overwrite").options(noUse="this options will not be used in save.")\
+ .option("noUse", "this option will not be used in save.")\
.format("json").save(path=tmpPath)
actual =\
self.sqlCtx.read.format("json")\