diff options
author | Takeshi YAMAMURO <linguin.m.s@gmail.com> | 2016-06-11 15:12:21 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-06-11 15:12:21 -0700 |
commit | cb5d933d86ac4afd947874f1f1c31c7154cb8249 (patch) | |
tree | 967e4aaea38134cd9423a63dc22e9846cc74dfaa | |
parent | ad102af169c7344b30d3b84aa16452fcdc22542c (diff) | |
download | spark-cb5d933d86ac4afd947874f1f1c31c7154cb8249.tar.gz spark-cb5d933d86ac4afd947874f1f1c31c7154cb8249.tar.bz2 spark-cb5d933d86ac4afd947874f1f1c31c7154cb8249.zip |
[SPARK-15585][SQL] Add doc for turning off quotations
## What changes were proposed in this pull request?
This pr is to add doc for turning off quotations because this behavior is different from `com.databricks.spark.csv`.
## How was this patch tested?
Check behavior to put an empty string in csv options.
Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Closes #13616 from maropu/SPARK-15585-2.
3 files changed, 17 insertions, 3 deletions
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 9208a527d2..7d1f18611b 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -320,7 +320,8 @@ class DataFrameReader(object): it uses the default value, ``UTF-8``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\``. :param comment: sets the single character used for skipping lines beginning with this @@ -804,7 +805,8 @@ class DataFrameWriter(object): set, it uses the default value, ``,``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\`` :param escapeQuotes: A flag indicating whether values containing quotes should always diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index b248583d79..bb5fa2b99f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding * type.</li> * <li>`quote` (default `"`): sets the single character used for escaping quoted values where - * the separator can be part of the value.</li> + * the separator can be part of the value. If you would like to turn off quotations, you need to + * set not `null` but an empty string. This behaviour is different form + * `com.databricks.spark.csv`.</li> * <li>`escape` (default `\`): sets the single character used for escaping quotes inside * an already quoted value.</li> * <li>`comment` (default empty string): sets the single character used for skipping lines diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index bc95446387..f170065132 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(msg.contains("CSV data source does not support array<string> data type")) } } + + test("SPARK-15585 turn off quotations") { + val cars = spark.read + .format("csv") + .option("header", "true") + .option("quote", "") + .load(testFile(carsUnbalancedQuotesFile)) + + verifyCars(cars, withHeader = true, checkValues = false) + } } |