aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-05-05 11:26:40 -0700
committerReynold Xin <rxin@databricks.com>2016-05-05 11:26:40 -0700
commitac12b35d31ef1d1663511bf6ae826a9cc0278f20 (patch)
treea357cd59bb59dd43abfc3347372c1b5c1684d861 /sql
parent55cc1c991a9e39efb14177a948b09b7909e53e25 (diff)
downloadspark-ac12b35d31ef1d1663511bf6ae826a9cc0278f20.tar.gz
spark-ac12b35d31ef1d1663511bf6ae826a9cc0278f20.tar.bz2
spark-ac12b35d31ef1d1663511bf6ae826a9cc0278f20.zip
[SPARK-15148][SQL] Upgrade Univocity library from 2.0.2 to 2.1.0
## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-15148 Mainly it improves the performance roughtly about 30%-40% according to the [release note](https://github.com/uniVocity/univocity-parsers/releases/tag/v2.1.0). For the details of the purpose is described in the JIRA. This PR upgrades Univocity library from 2.0.2 to 2.1.0. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwon <gurwls223@gmail.com> Closes #12923 from HyukjinKwon/SPARK-15148.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/pom.xml2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala4
2 files changed, 3 insertions, 3 deletions
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index e1071ebfb5..184fa2f6bd 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -39,7 +39,7 @@
<dependency>
<groupId>com.univocity</groupId>
<artifactId>univocity-parsers</artifactId>
- <version>2.0.2</version>
+ <version>2.1.0</version>
<type>jar</type>
</dependency>
<dependency>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
index c3d863f547..ae797a1e07 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.csv
import java.io.{ByteArrayOutputStream, OutputStreamWriter, StringReader}
import java.nio.charset.StandardCharsets
-import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWriterSettings}
+import com.univocity.parsers.csv._
import org.apache.spark.internal.Logging
@@ -47,7 +47,7 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String])
settings.setMaxColumns(params.maxColumns)
settings.setNullValue(params.nullValue)
settings.setMaxCharsPerColumn(params.maxCharsPerColumn)
- settings.setParseUnescapedQuotesUntilDelimiter(true)
+ settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER)
if (headers != null) settings.setHeaders(headers: _*)
new CsvParser(settings)