aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorTakeshi YAMAMURO <linguin.m.s@gmail.com>2016-06-05 23:35:04 -0700
committerReynold Xin <rxin@databricks.com>2016-06-05 23:35:04 -0700
commitb7e8d1cb3ce932ba4a784be59744af8a8ef027ce (patch)
treed0c996546e71b0fa5d46cb9ba32d4acc99eff971 /sql/core
parent79268aa461abd237bc4f96a7d31457c98e11798c (diff)
downloadspark-b7e8d1cb3ce932ba4a784be59744af8a8ef027ce.tar.gz
spark-b7e8d1cb3ce932ba4a784be59744af8a8ef027ce.tar.bz2
spark-b7e8d1cb3ce932ba4a784be59744af8a8ef027ce.zip
[SPARK-15585][SQL] Fix NULL handling along with a spark-csv behaivour
## What changes were proposed in this pull request? This pr fixes the behaviour of `format("csv").option("quote", null)` along with one of spark-csv. Also, it explicitly sets default values for CSV options in python. ## How was this patch tested? Added tests in CSVSuite. Author: Takeshi YAMAMURO <linguin.m.s@gmail.com> Closes #13372 from maropu/SPARK-15585.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala11
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala11
2 files changed, 16 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 9f4ce8358b..044ada2607 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -30,8 +30,7 @@ private[sql] class CSVOptions(@transient private val parameters: Map[String, Str
val paramValue = parameters.get(paramName)
paramValue match {
case None => default
- case Some(null) => default
- case Some(value) if value.length == 0 => '\u0000'
+ case Some(value) if value == null || value.length == 0 => '\u0000'
case Some(value) if value.length == 1 => value.charAt(0)
case _ => throw new RuntimeException(s"$paramName cannot be more than one character")
}
@@ -52,12 +51,12 @@ private[sql] class CSVOptions(@transient private val parameters: Map[String, Str
}
private def getBool(paramName: String, default: Boolean = false): Boolean = {
- val param = parameters.getOrElse(paramName, default.toString)
- if (param == null) {
+ val paramValue = parameters.getOrElse(paramName, default.toString)
+ if (paramValue == null) {
default
- } else if (param.toLowerCase == "true") {
+ } else if (paramValue.toLowerCase == "true") {
true
- } else if (param.toLowerCase == "false") {
+ } else if (paramValue.toLowerCase == "false") {
false
} else {
throw new Exception(s"$paramName flag can be true or false")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index bc95446387..b26fcea759 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -655,4 +655,15 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
assert(msg.contains("CSV data source does not support array<string> data type"))
}
}
+
+ test("SPARK-15585 set null at quote") {
+ val cars = spark.read
+ .format("csv")
+ .option("header", "true")
+ .option("quote", null)
+ .load(testFile(carsUnbalancedQuotesFile))
+
+ verifyCars(cars, withHeader = true, checkValues = false)
+ }
+
}