From d7982a3a9aa804e7e3a2004335e7f314867a5f8a Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 1 Apr 2016 22:51:47 -0700 Subject: [MINOR][SQL] Fix comments styl and correct several styles and nits in CSV data source ## What changes were proposed in this pull request? While trying to create a PR (which was not an issue at the end), I just corrected some style nits. So, I removed the changes except for some coding style corrections. - According to the [scala-style-guide#documentation-style](https://github.com/databricks/scala-style-guide#documentation-style), Scala style comments are discouraged. >```scala >/** This is a correct one-liner, short description. */ > >/** > * This is correct multi-line JavaDoc comment. And > * this is my second line, and if I keep typing, this would be > * my third line. > */ > >/** In Spark, we don't use the ScalaDoc style so this > * is not correct. > */ >``` - Double newlines between consecutive methods was removed. According to [scala-style-guide#blank-lines-vertical-whitespace](https://github.com/databricks/scala-style-guide#blank-lines-vertical-whitespace), single newline appears when >Between consecutive members (or initializers) of a class: fields, constructors, methods, nested classes, static initializers, instance initializers. - Remove uesless parentheses in tests - Use `mapPartitions` instead of `mapPartitionsWithIndex()`. ## How was this patch tested? Unit tests were used and `dev/run_tests` for style tests. Author: hyukjinkwon Closes #12109 from HyukjinKwon/SPARK-14271. --- .../sql/execution/datasources/csv/CSVParser.scala | 80 +++++++++++----------- .../execution/datasources/csv/CSVRelation.scala | 6 +- .../execution/datasources/csv/DefaultSource.scala | 1 - .../execution/datasources/csv/CSVParserSuite.scala | 10 +-- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala index 7cf1b4c662..5570b2c173 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala @@ -25,11 +25,11 @@ import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWr import org.apache.spark.internal.Logging /** - * Read and parse CSV-like input - * - * @param params Parameters object - * @param headers headers for the columns - */ + * Read and parse CSV-like input + * + * @param params Parameters object + * @param headers headers for the columns + */ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) { protected lazy val parser: CsvParser = { @@ -54,11 +54,11 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) } /** - * Converts a sequence of string to CSV string - * - * @param params Parameters object for configuration - * @param headers headers for columns - */ + * Converts a sequence of string to CSV string + * + * @param params Parameters object for configuration + * @param headers headers for columns + */ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat @@ -90,18 +90,18 @@ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten } /** - * Parser for parsing a line at a time. Not efficient for bulk data. - * - * @param params Parameters object - */ + * Parser for parsing a line at a time. Not efficient for bulk data. + * + * @param params Parameters object + */ private[sql] class LineCsvReader(params: CSVOptions) extends CsvReader(params, null) { /** - * parse a line - * - * @param line a String with no newline at the end - * @return array of strings where each string is a field in the CSV record - */ + * parse a line + * + * @param line a String with no newline at the end + * @return array of strings where each string is a field in the CSV record + */ def parseLine(line: String): Array[String] = { parser.beginParsing(new StringReader(line)) val parsed = parser.parseNext() @@ -111,12 +111,12 @@ private[sql] class LineCsvReader(params: CSVOptions) } /** - * Parser for parsing lines in bulk. Use this when efficiency is desired. - * - * @param iter iterator over lines in the file - * @param params Parameters object - * @param headers headers for the columns - */ + * Parser for parsing lines in bulk. Use this when efficiency is desired. + * + * @param iter iterator over lines in the file + * @param params Parameters object + * @param headers headers for the columns + */ private[sql] class BulkCsvReader( iter: Iterator[String], params: CSVOptions, @@ -128,9 +128,9 @@ private[sql] class BulkCsvReader( private var nextRecord = parser.parseNext() /** - * get the next parsed line. - * @return array of strings where each string is a field in the CSV record - */ + * get the next parsed line. + * @return array of strings where each string is a field in the CSV record + */ override def next(): Array[String] = { val curRecord = nextRecord if(curRecord != null) { @@ -146,11 +146,11 @@ private[sql] class BulkCsvReader( } /** - * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at - * end of each line Univocity parser requires a Reader that provides access to the data to be - * parsed and needs the newlines to be present - * @param iter iterator over RDD[String] - */ + * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at + * end of each line Univocity parser requires a Reader that provides access to the data to be + * parsed and needs the newlines to be present + * @param iter iterator over RDD[String] + */ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.Reader { private var next: Long = 0 @@ -159,9 +159,9 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R private var str: String = null // current string from iter /** - * fetch next string from iter, if done with current one - * pretend there is a new line at the end of every string we get from from iter - */ + * fetch next string from iter, if done with current one + * pretend there is a new line at the end of every string we get from from iter + */ private def refill(): Unit = { if (length == next) { if (iter.hasNext) { @@ -175,8 +175,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R } /** - * read the next character, if at end of string pretend there is a new line - */ + * read the next character, if at end of string pretend there is a new line + */ override def read(): Int = { refill() if (next >= length) { @@ -189,8 +189,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R } /** - * read from str into cbuf - */ + * read from str into cbuf + */ override def read(cbuf: Array[Char], off: Int, len: Int): Int = { refill() var n = 0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala index b47328a3dd..54fb03b6d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala @@ -42,12 +42,12 @@ object CSVRelation extends Logging { firstLine: String, params: CSVOptions): RDD[Array[String]] = { // If header is set, make sure firstLine is materialized before sending to executors. - file.mapPartitionsWithIndex({ - case (split, iter) => new BulkCsvReader( + file.mapPartitions { iter => + new BulkCsvReader( if (params.headerFlag) iter.filterNot(_ == firstLine) else iter, params, headers = header) - }, true) + } } def csvParser( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala index 6b6add48cd..c0d6f6fbf7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala @@ -164,7 +164,6 @@ class DefaultSource extends FileFormat with DataSourceRegister { } } - private def baseRdd( sqlContext: SQLContext, options: CSVOptions, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala index c0c38c6787..dc54883277 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala @@ -46,7 +46,7 @@ class CSVParserSuite extends SparkFunSuite { var numRead = 0 var n = 0 do { // try to fill cbuf - var off = 0 + var off = 0 var len = cbuf.length n = reader.read(cbuf, off, len) @@ -81,7 +81,7 @@ class CSVParserSuite extends SparkFunSuite { test("Regular case") { val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"") val read = readAll(input.toIterator) - assert(read === input.mkString("\n") ++ ("\n")) + assert(read === input.mkString("\n") ++ "\n") } test("Empty iter") { @@ -93,12 +93,12 @@ class CSVParserSuite extends SparkFunSuite { test("Embedded new line") { val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"") val read = readAll(input.toIterator) - assert(read === input.mkString("\n") ++ ("\n")) + assert(read === input.mkString("\n") ++ "\n") } test("Buffer Regular case") { val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"") - val output = input.mkString("\n") ++ ("\n") + val output = input.mkString("\n") ++ "\n" for(i <- 1 to output.length + 5) { val read = readBufAll(input.toIterator, i) assert(read === output) @@ -116,7 +116,7 @@ class CSVParserSuite extends SparkFunSuite { test("Buffer Embedded new line") { val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"") - val output = input.mkString("\n") ++ ("\n") + val output = input.mkString("\n") ++ "\n" for(i <- 1 to output.length + 5) { val read = readBufAll(input.toIterator, 1) assert(read === output) -- cgit v1.2.3