diff options
author | Takeshi YAMAMURO <linguin.m.s@gmail.com> | 2016-06-05 23:35:04 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-06-05 23:35:04 -0700 |
commit | b7e8d1cb3ce932ba4a784be59744af8a8ef027ce (patch) | |
tree | d0c996546e71b0fa5d46cb9ba32d4acc99eff971 /sql/core | |
parent | 79268aa461abd237bc4f96a7d31457c98e11798c (diff) | |
download | spark-b7e8d1cb3ce932ba4a784be59744af8a8ef027ce.tar.gz spark-b7e8d1cb3ce932ba4a784be59744af8a8ef027ce.tar.bz2 spark-b7e8d1cb3ce932ba4a784be59744af8a8ef027ce.zip |
[SPARK-15585][SQL] Fix NULL handling along with a spark-csv behaivour
## What changes were proposed in this pull request?
This pr fixes the behaviour of `format("csv").option("quote", null)` along with one of spark-csv.
Also, it explicitly sets default values for CSV options in python.
## How was this patch tested?
Added tests in CSVSuite.
Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Closes #13372 from maropu/SPARK-15585.
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala | 11 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 11 |
2 files changed, 16 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala index 9f4ce8358b..044ada2607 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala @@ -30,8 +30,7 @@ private[sql] class CSVOptions(@transient private val parameters: Map[String, Str val paramValue = parameters.get(paramName) paramValue match { case None => default - case Some(null) => default - case Some(value) if value.length == 0 => '\u0000' + case Some(value) if value == null || value.length == 0 => '\u0000' case Some(value) if value.length == 1 => value.charAt(0) case _ => throw new RuntimeException(s"$paramName cannot be more than one character") } @@ -52,12 +51,12 @@ private[sql] class CSVOptions(@transient private val parameters: Map[String, Str } private def getBool(paramName: String, default: Boolean = false): Boolean = { - val param = parameters.getOrElse(paramName, default.toString) - if (param == null) { + val paramValue = parameters.getOrElse(paramName, default.toString) + if (paramValue == null) { default - } else if (param.toLowerCase == "true") { + } else if (paramValue.toLowerCase == "true") { true - } else if (param.toLowerCase == "false") { + } else if (paramValue.toLowerCase == "false") { false } else { throw new Exception(s"$paramName flag can be true or false") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index bc95446387..b26fcea759 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -655,4 +655,15 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { assert(msg.contains("CSV data source does not support array<string> data type")) } } + + test("SPARK-15585 set null at quote") { + val cars = spark.read + .format("csv") + .option("header", "true") + .option("quote", null) + .load(testFile(carsUnbalancedQuotesFile)) + + verifyCars(cars, withHeader = true, checkValues = false) + } + } |