aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/pyspark/sql/readwriter.py6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala10
3 files changed, 17 insertions, 3 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 9208a527d2..7d1f18611b 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -320,7 +320,8 @@ class DataFrameReader(object):
it uses the default value, ``UTF-8``.
:param quote: sets the single character used for escaping quoted values where the
separator can be part of the value. If None is set, it uses the default
- value, ``"``.
+ value, ``"``. If you would like to turn off quotations, you need to set an
+ empty string.
:param escape: sets the single character used for escaping quotes inside an already
quoted value. If None is set, it uses the default value, ``\``.
:param comment: sets the single character used for skipping lines beginning with this
@@ -804,7 +805,8 @@ class DataFrameWriter(object):
set, it uses the default value, ``,``.
:param quote: sets the single character used for escaping quoted values where the
separator can be part of the value. If None is set, it uses the default
- value, ``"``.
+ value, ``"``. If you would like to turn off quotations, you need to set an
+ empty string.
:param escape: sets the single character used for escaping quotes inside an already
quoted value. If None is set, it uses the default value, ``\``
:param escapeQuotes: A flag indicating whether values containing quotes should always
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index b248583d79..bb5fa2b99f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
* type.</li>
* <li>`quote` (default `"`): sets the single character used for escaping quoted values where
- * the separator can be part of the value.</li>
+ * the separator can be part of the value. If you would like to turn off quotations, you need to
+ * set not `null` but an empty string. This behaviour is different form
+ * `com.databricks.spark.csv`.</li>
* <li>`escape` (default `\`): sets the single character used for escaping quotes inside
* an already quoted value.</li>
* <li>`comment` (default empty string): sets the single character used for skipping lines
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index bc95446387..f170065132 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
assert(msg.contains("CSV data source does not support array<string> data type"))
}
}
+
+ test("SPARK-15585 turn off quotations") {
+ val cars = spark.read
+ .format("csv")
+ .option("header", "true")
+ .option("quote", "")
+ .load(testFile(carsUnbalancedQuotesFile))
+
+ verifyCars(cars, withHeader = true, checkValues = false)
+ }
}