diff options
author | hyukjinkwon <gurwls223@gmail.com> | 2016-05-05 11:26:40 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-05-05 11:26:40 -0700 |
commit | ac12b35d31ef1d1663511bf6ae826a9cc0278f20 (patch) | |
tree | a357cd59bb59dd43abfc3347372c1b5c1684d861 /sql | |
parent | 55cc1c991a9e39efb14177a948b09b7909e53e25 (diff) | |
download | spark-ac12b35d31ef1d1663511bf6ae826a9cc0278f20.tar.gz spark-ac12b35d31ef1d1663511bf6ae826a9cc0278f20.tar.bz2 spark-ac12b35d31ef1d1663511bf6ae826a9cc0278f20.zip |
[SPARK-15148][SQL] Upgrade Univocity library from 2.0.2 to 2.1.0
## What changes were proposed in this pull request?
https://issues.apache.org/jira/browse/SPARK-15148
Mainly it improves the performance roughtly about 30%-40% according to the [release note](https://github.com/uniVocity/univocity-parsers/releases/tag/v2.1.0). For the details of the purpose is described in the JIRA.
This PR upgrades Univocity library from 2.0.2 to 2.1.0.
## How was this patch tested?
Existing tests should cover this.
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #12923 from HyukjinKwon/SPARK-15148.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/core/pom.xml | 2 | ||||
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/sql/core/pom.xml b/sql/core/pom.xml index e1071ebfb5..184fa2f6bd 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -39,7 +39,7 @@ <dependency> <groupId>com.univocity</groupId> <artifactId>univocity-parsers</artifactId> - <version>2.0.2</version> + <version>2.1.0</version> <type>jar</type> </dependency> <dependency> diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala index c3d863f547..ae797a1e07 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.csv import java.io.{ByteArrayOutputStream, OutputStreamWriter, StringReader} import java.nio.charset.StandardCharsets -import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWriterSettings} +import com.univocity.parsers.csv._ import org.apache.spark.internal.Logging @@ -47,7 +47,7 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) settings.setMaxColumns(params.maxColumns) settings.setNullValue(params.nullValue) settings.setMaxCharsPerColumn(params.maxCharsPerColumn) - settings.setParseUnescapedQuotesUntilDelimiter(true) + settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER) if (headers != null) settings.setHeaders(headers: _*) new CsvParser(settings) |