diff options
author | Hossein <hossein@databricks.com> | 2016-02-19 14:46:56 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-02-19 14:46:56 -0800 |
commit | 14844118b596a93dbc28b442a7ea2b58fa4df648 (patch) | |
tree | 552a7258b383d14d7e522f30d43c473ad680c611 | |
parent | dbb08cdd5ae320082cdbcc9cfb8155f5a9da8b8c (diff) | |
download | spark-14844118b596a93dbc28b442a7ea2b58fa4df648.tar.gz spark-14844118b596a93dbc28b442a7ea2b58fa4df648.tar.bz2 spark-14844118b596a93dbc28b442a7ea2b58fa4df648.zip |
[SPARK-13261][SQL] Expose maxCharactersPerColumn as a user configurable option
This patch expose `maxCharactersPerColumn` and `maxColumns` to user in CSV data source.
Author: Hossein <hossein@databricks.com>
Closes #11147 from falaki/SPARK-13261.
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala index 709daccbbe..bea8e97a9a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala @@ -36,6 +36,19 @@ private[sql] class CSVOptions( } } + private def getInt(paramName: String, default: Int): Int = { + val paramValue = parameters.get(paramName) + paramValue match { + case None => default + case Some(value) => try { + value.toInt + } catch { + case e: NumberFormatException => + throw new RuntimeException(s"$paramName should be an integer. Found $value") + } + } + } + private def getBool(paramName: String, default: Boolean = false): Boolean = { val param = parameters.getOrElse(paramName, default.toString) if (param.toLowerCase == "true") { @@ -81,9 +94,9 @@ private[sql] class CSVOptions( name.map(CompressionCodecs.getCodecClassName) } - val maxColumns = 20480 + val maxColumns = getInt("maxColumns", 20480) - val maxCharsPerColumn = 100000 + val maxCharsPerColumn = getInt("maxCharsPerColumn", 1000000) val inputBufferSize = 128 |