diff options
-rw-r--r-- | dev/deps/spark-deps-hadoop-2.2 | 2 | ||||
-rw-r--r-- | dev/deps/spark-deps-hadoop-2.3 | 2 | ||||
-rw-r--r-- | dev/deps/spark-deps-hadoop-2.4 | 2 | ||||
-rw-r--r-- | dev/deps/spark-deps-hadoop-2.6 | 2 | ||||
-rw-r--r-- | dev/deps/spark-deps-hadoop-2.7 | 2 | ||||
-rw-r--r-- | sql/core/pom.xml | 2 | ||||
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala | 1 | ||||
-rw-r--r-- | sql/core/src/test/resources/unescaped-quotes.csv | 2 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 12 |
9 files changed, 21 insertions, 6 deletions
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2 index 2c24366cc3..2794b3d235 100644 --- a/dev/deps/spark-deps-hadoop-2.2 +++ b/dev/deps/spark-deps-hadoop-2.2 @@ -175,7 +175,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-1.5.6.jar +univocity-parsers-2.0.2.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 index e9cb0d8f3e..4906fe9cfa 100644 --- a/dev/deps/spark-deps-hadoop-2.3 +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -166,7 +166,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-1.5.6.jar +univocity-parsers-2.0.2.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 index d8d1840da5..23ff5cfa2e 100644 --- a/dev/deps/spark-deps-hadoop-2.4 +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -167,7 +167,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-1.5.6.jar +univocity-parsers-2.0.2.jar xbean-asm5-shaded-4.4.jar xmlenc-0.52.jar xz-1.0.jar diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 8beede1e38..9b5a5643f3 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -173,7 +173,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-1.5.6.jar +univocity-parsers-2.0.2.jar xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index a9d814f944..1dca2fc55a 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -174,7 +174,7 @@ stax-api-1.0.1.jar stream-2.7.0.jar stringtemplate-3.2.1.jar super-csv-2.2.0.jar -univocity-parsers-1.5.6.jar +univocity-parsers-2.0.2.jar xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 708670b292..8b1017042c 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -39,7 +39,7 @@ <dependency> <groupId>com.univocity</groupId> <artifactId>univocity-parsers</artifactId> - <version>1.5.6</version> + <version>2.0.2</version> <type>jar</type> </dependency> <dependency> diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala index 5570b2c173..c3d863f547 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala @@ -47,6 +47,7 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) settings.setMaxColumns(params.maxColumns) settings.setNullValue(params.nullValue) settings.setMaxCharsPerColumn(params.maxCharsPerColumn) + settings.setParseUnescapedQuotesUntilDelimiter(true) if (headers != null) settings.setHeaders(headers: _*) new CsvParser(settings) diff --git a/sql/core/src/test/resources/unescaped-quotes.csv b/sql/core/src/test/resources/unescaped-quotes.csv new file mode 100644 index 0000000000..7c68055575 --- /dev/null +++ b/sql/core/src/test/resources/unescaped-quotes.csv @@ -0,0 +1,2 @@ +"a"b,ccc,ddd +ab,cc"c,ddd" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 58d9d69d9a..9baae80f15 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -45,6 +45,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { private val disableCommentsFile = "disable_comments.csv" private val boolFile = "bool.csv" private val simpleSparseFile = "simple_sparse.csv" + private val unescapedQuotesFile = "unescaped-quotes.csv" private def testFile(fileName: String): String = { Thread.currentThread().getContextClassLoader.getResource(fileName).toString @@ -140,6 +141,17 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { verifyCars(cars, withHeader = true) } + test("parse unescaped quotes with maxCharsPerColumn") { + val rows = sqlContext.read + .format("csv") + .option("maxCharsPerColumn", "4") + .load(testFile(unescapedQuotesFile)) + + val expectedRows = Seq(Row("\"a\"b", "ccc", "ddd"), Row("ab", "cc\"c", "ddd\"")) + + checkAnswer(rows, expectedRows) + } + test("bad encoding name") { val exception = intercept[UnsupportedCharsetException] { sqlContext |