diff options
author | hyukjinkwon <gurwls223@gmail.com> | 2016-04-01 22:51:47 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-04-01 22:51:47 -0700 |
commit | d7982a3a9aa804e7e3a2004335e7f314867a5f8a (patch) | |
tree | d9c7604c13525a96c564e34c51b6e70648bc7bdf /sql | |
parent | f414154418c2291448954b9f0890d592b2d823ae (diff) | |
download | spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.gz spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.bz2 spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.zip |
[MINOR][SQL] Fix comments styl and correct several styles and nits in CSV data source
## What changes were proposed in this pull request?
While trying to create a PR (which was not an issue at the end), I just corrected some style nits.
So, I removed the changes except for some coding style corrections.
- According to the [scala-style-guide#documentation-style](https://github.com/databricks/scala-style-guide#documentation-style), Scala style comments are discouraged.
>```scala
>/** This is a correct one-liner, short description. */
>
>/**
> * This is correct multi-line JavaDoc comment. And
> * this is my second line, and if I keep typing, this would be
> * my third line.
> */
>
>/** In Spark, we don't use the ScalaDoc style so this
> * is not correct.
> */
>```
- Double newlines between consecutive methods was removed. According to [scala-style-guide#blank-lines-vertical-whitespace](https://github.com/databricks/scala-style-guide#blank-lines-vertical-whitespace), single newline appears when
>Between consecutive members (or initializers) of a class: fields, constructors, methods, nested classes, static initializers, instance initializers.
- Remove uesless parentheses in tests
- Use `mapPartitions` instead of `mapPartitionsWithIndex()`.
## How was this patch tested?
Unit tests were used and `dev/run_tests` for style tests.
Author: hyukjinkwon <gurwls223@gmail.com>
Closes #12109 from HyukjinKwon/SPARK-14271.
Diffstat (limited to 'sql')
4 files changed, 48 insertions, 49 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala index 7cf1b4c662..5570b2c173 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala @@ -25,11 +25,11 @@ import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWr import org.apache.spark.internal.Logging /** - * Read and parse CSV-like input - * - * @param params Parameters object - * @param headers headers for the columns - */ + * Read and parse CSV-like input + * + * @param params Parameters object + * @param headers headers for the columns + */ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) { protected lazy val parser: CsvParser = { @@ -54,11 +54,11 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) } /** - * Converts a sequence of string to CSV string - * - * @param params Parameters object for configuration - * @param headers headers for columns - */ + * Converts a sequence of string to CSV string + * + * @param params Parameters object for configuration + * @param headers headers for columns + */ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging { private val writerSettings = new CsvWriterSettings private val format = writerSettings.getFormat @@ -90,18 +90,18 @@ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten } /** - * Parser for parsing a line at a time. Not efficient for bulk data. - * - * @param params Parameters object - */ + * Parser for parsing a line at a time. Not efficient for bulk data. + * + * @param params Parameters object + */ private[sql] class LineCsvReader(params: CSVOptions) extends CsvReader(params, null) { /** - * parse a line - * - * @param line a String with no newline at the end - * @return array of strings where each string is a field in the CSV record - */ + * parse a line + * + * @param line a String with no newline at the end + * @return array of strings where each string is a field in the CSV record + */ def parseLine(line: String): Array[String] = { parser.beginParsing(new StringReader(line)) val parsed = parser.parseNext() @@ -111,12 +111,12 @@ private[sql] class LineCsvReader(params: CSVOptions) } /** - * Parser for parsing lines in bulk. Use this when efficiency is desired. - * - * @param iter iterator over lines in the file - * @param params Parameters object - * @param headers headers for the columns - */ + * Parser for parsing lines in bulk. Use this when efficiency is desired. + * + * @param iter iterator over lines in the file + * @param params Parameters object + * @param headers headers for the columns + */ private[sql] class BulkCsvReader( iter: Iterator[String], params: CSVOptions, @@ -128,9 +128,9 @@ private[sql] class BulkCsvReader( private var nextRecord = parser.parseNext() /** - * get the next parsed line. - * @return array of strings where each string is a field in the CSV record - */ + * get the next parsed line. + * @return array of strings where each string is a field in the CSV record + */ override def next(): Array[String] = { val curRecord = nextRecord if(curRecord != null) { @@ -146,11 +146,11 @@ private[sql] class BulkCsvReader( } /** - * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at - * end of each line Univocity parser requires a Reader that provides access to the data to be - * parsed and needs the newlines to be present - * @param iter iterator over RDD[String] - */ + * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at + * end of each line Univocity parser requires a Reader that provides access to the data to be + * parsed and needs the newlines to be present + * @param iter iterator over RDD[String] + */ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.Reader { private var next: Long = 0 @@ -159,9 +159,9 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R private var str: String = null // current string from iter /** - * fetch next string from iter, if done with current one - * pretend there is a new line at the end of every string we get from from iter - */ + * fetch next string from iter, if done with current one + * pretend there is a new line at the end of every string we get from from iter + */ private def refill(): Unit = { if (length == next) { if (iter.hasNext) { @@ -175,8 +175,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R } /** - * read the next character, if at end of string pretend there is a new line - */ + * read the next character, if at end of string pretend there is a new line + */ override def read(): Int = { refill() if (next >= length) { @@ -189,8 +189,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R } /** - * read from str into cbuf - */ + * read from str into cbuf + */ override def read(cbuf: Array[Char], off: Int, len: Int): Int = { refill() var n = 0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala index b47328a3dd..54fb03b6d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala @@ -42,12 +42,12 @@ object CSVRelation extends Logging { firstLine: String, params: CSVOptions): RDD[Array[String]] = { // If header is set, make sure firstLine is materialized before sending to executors. - file.mapPartitionsWithIndex({ - case (split, iter) => new BulkCsvReader( + file.mapPartitions { iter => + new BulkCsvReader( if (params.headerFlag) iter.filterNot(_ == firstLine) else iter, params, headers = header) - }, true) + } } def csvParser( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala index 6b6add48cd..c0d6f6fbf7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala @@ -164,7 +164,6 @@ class DefaultSource extends FileFormat with DataSourceRegister { } } - private def baseRdd( sqlContext: SQLContext, options: CSVOptions, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala index c0c38c6787..dc54883277 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala @@ -46,7 +46,7 @@ class CSVParserSuite extends SparkFunSuite { var numRead = 0 var n = 0 do { // try to fill cbuf - var off = 0 + var off = 0 var len = cbuf.length n = reader.read(cbuf, off, len) @@ -81,7 +81,7 @@ class CSVParserSuite extends SparkFunSuite { test("Regular case") { val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"") val read = readAll(input.toIterator) - assert(read === input.mkString("\n") ++ ("\n")) + assert(read === input.mkString("\n") ++ "\n") } test("Empty iter") { @@ -93,12 +93,12 @@ class CSVParserSuite extends SparkFunSuite { test("Embedded new line") { val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"") val read = readAll(input.toIterator) - assert(read === input.mkString("\n") ++ ("\n")) + assert(read === input.mkString("\n") ++ "\n") } test("Buffer Regular case") { val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"") - val output = input.mkString("\n") ++ ("\n") + val output = input.mkString("\n") ++ "\n" for(i <- 1 to output.length + 5) { val read = readBufAll(input.toIterator, i) assert(read === output) @@ -116,7 +116,7 @@ class CSVParserSuite extends SparkFunSuite { test("Buffer Embedded new line") { val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"") - val output = input.mkString("\n") ++ ("\n") + val output = input.mkString("\n") ++ "\n" for(i <- 1 to output.length + 5) { val read = readBufAll(input.toIterator, 1) assert(read === output) |