From cb5d933d86ac4afd947874f1f1c31c7154cb8249 Mon Sep 17 00:00:00 2001 From: Takeshi YAMAMURO Date: Sat, 11 Jun 2016 15:12:21 -0700 Subject: [SPARK-15585][SQL] Add doc for turning off quotations ## What changes were proposed in this pull request? This pr is to add doc for turning off quotations because this behavior is different from `com.databricks.spark.csv`. ## How was this patch tested? Check behavior to put an empty string in csv options. Author: Takeshi YAMAMURO Closes #13616 from maropu/SPARK-15585-2. --- python/pyspark/sql/readwriter.py | 6 ++++-- .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 4 +++- .../apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 10 ++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 9208a527d2..7d1f18611b 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -320,7 +320,8 @@ class DataFrameReader(object): it uses the default value, ``UTF-8``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\``. :param comment: sets the single character used for skipping lines beginning with this @@ -804,7 +805,8 @@ class DataFrameWriter(object): set, it uses the default value, ``,``. :param quote: sets the single character used for escaping quoted values where the separator can be part of the value. If None is set, it uses the default - value, ``"``. + value, ``"``. If you would like to turn off quotations, you need to set an + empty string. :param escape: sets the single character used for escaping quotes inside an already quoted value. If None is set, it uses the default value, ``\`` :param escapeQuotes: A flag indicating whether values containing quotes should always diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index b248583d79..bb5fa2b99f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -370,7 +370,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { *
  • `encoding` (default `UTF-8`): decodes the CSV files by the given encoding * type.
  • *
  • `quote` (default `"`): sets the single character used for escaping quoted values where - * the separator can be part of the value.
  • + * the separator can be part of the value. If you would like to turn off quotations, you need to + * set not `null` but an empty string. This behaviour is different form + * `com.databricks.spark.csv`. *
  • `escape` (default `\`): sets the single character used for escaping quotes inside * an already quoted value.
  • *
  • `comment` (default empty string): sets the single character used for skipping lines diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index bc95446387..f170065132 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -655,4 +655,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(msg.contains("CSV data source does not support array data type")) } } + + test("SPARK-15585 turn off quotations") { + val cars = spark.read + .format("csv") + .option("header", "true") + .option("quote", "") + .load(testFile(carsUnbalancedQuotesFile)) + + verifyCars(cars, withHeader = true, checkValues = false) + } } -- cgit v1.2.3