aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHossein <hossein@databricks.com>2016-02-19 14:46:56 -0800
committerMichael Armbrust <michael@databricks.com>2016-02-19 14:46:56 -0800
commit14844118b596a93dbc28b442a7ea2b58fa4df648 (patch)
tree552a7258b383d14d7e522f30d43c473ad680c611
parentdbb08cdd5ae320082cdbcc9cfb8155f5a9da8b8c (diff)
downloadspark-14844118b596a93dbc28b442a7ea2b58fa4df648.tar.gz
spark-14844118b596a93dbc28b442a7ea2b58fa4df648.tar.bz2
spark-14844118b596a93dbc28b442a7ea2b58fa4df648.zip
[SPARK-13261][SQL] Expose maxCharactersPerColumn as a user configurable option
This patch expose `maxCharactersPerColumn` and `maxColumns` to user in CSV data source. Author: Hossein <hossein@databricks.com> Closes #11147 from falaki/SPARK-13261.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala17
1 files changed, 15 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 709daccbbe..bea8e97a9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -36,6 +36,19 @@ private[sql] class CSVOptions(
}
}
+ private def getInt(paramName: String, default: Int): Int = {
+ val paramValue = parameters.get(paramName)
+ paramValue match {
+ case None => default
+ case Some(value) => try {
+ value.toInt
+ } catch {
+ case e: NumberFormatException =>
+ throw new RuntimeException(s"$paramName should be an integer. Found $value")
+ }
+ }
+ }
+
private def getBool(paramName: String, default: Boolean = false): Boolean = {
val param = parameters.getOrElse(paramName, default.toString)
if (param.toLowerCase == "true") {
@@ -81,9 +94,9 @@ private[sql] class CSVOptions(
name.map(CompressionCodecs.getCodecClassName)
}
- val maxColumns = 20480
+ val maxColumns = getInt("maxColumns", 20480)
- val maxCharsPerColumn = 100000
+ val maxCharsPerColumn = getInt("maxCharsPerColumn", 1000000)
val inputBufferSize = 128