aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/readwriter.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/readwriter.py')
-rw-r--r--python/pyspark/sql/readwriter.py8
1 files changed, 8 insertions, 0 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 438662bb15..bae9e69df8 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -162,6 +162,14 @@ class DataFrameReader(object):
(e.g. 00012)
* ``allowBackslashEscapingAnyCharacter`` (default ``false``): allows accepting quoting \
of all character using backslash quoting mechanism
+ * ``mode`` (default ``PERMISSIVE``): allows a mode for dealing with corrupt records \
+ during parsing.
+ * ``PERMISSIVE`` : sets other fields to ``null`` when it meets a corrupted \
+ record and puts the malformed string into a new field configured by \
+ ``spark.sql.columnNameOfCorruptRecord``. When a schema is set by user, it sets \
+ ``null`` for extra fields.
+ * ``DROPMALFORMED`` : ignores the whole corrupted records.
+ * ``FAILFAST`` : throws an exception when it meets corrupted records.
>>> df1 = sqlContext.read.json('python/test_support/sql/people.json')
>>> df1.dtypes