[MINOR][SQL] Fix comments styl and correct several styles and nits in CSV data source

## What changes were proposed in this pull request? While trying to create a PR (which was not an issue at the end), I just corrected some style nits. So, I removed the changes except for some coding style corrections. - According to the [scala-style-guide#documentation-style](https://github.com/databricks/scala-style-guide#documentation-style), Scala style comments are discouraged. >```scala >/** This is a correct one-liner, short description. */ > >/** > * This is correct multi-line JavaDoc comment. And > * this is my second line, and if I keep typing, this would be > * my third line. > */ > >/** In Spark, we don't use the ScalaDoc style so this > * is not correct. > */ >``` - Double newlines between consecutive methods was removed. According to [scala-style-guide#blank-lines-vertical-whitespace](https://github.com/databricks/scala-style-guide#blank-lines-vertical-whitespace), single newline appears when >Between consecutive members (or initializers) of a class: fields, constructors, methods, nested classes, static initializers, instance initializers. - Remove uesless parentheses in tests - Use `mapPartitions` instead of `mapPartitionsWithIndex()`. ## How was this patch tested? Unit tests were used and `dev/run_tests` for style tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #12109 from HyukjinKwon/SPARK-14271.
author: hyukjinkwon <gurwls223@gmail.com> 2016-04-01 22:51:47 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-04-01 22:51:47 -0700
commit: d7982a3a9aa804e7e3a2004335e7f314867a5f8a (patch)
tree: d9c7604c13525a96c564e34c51b6e70648bc7bdf /sql
parent: f414154418c2291448954b9f0890d592b2d823ae (diff)
download: spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.gz
spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.bz2
spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.zip
4 files changed, 48 insertions, 49 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
index 7cf1b4c662..5570b2c173 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
@@ -25,11 +25,11 @@ import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWr
 import org.apache.spark.internal.Logging
 
 /**
-  * Read and parse CSV-like input
-  *
-  * @param params Parameters object
-  * @param headers headers for the columns
-  */
+ * Read and parse CSV-like input
+ *
+ * @param params Parameters object
+ * @param headers headers for the columns
+ */
 private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) {
 
   protected lazy val parser: CsvParser = {
@@ -54,11 +54,11 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String])
 }
 
 /**
-  * Converts a sequence of string to CSV string
-  *
-  * @param params Parameters object for configuration
-  * @param headers headers for columns
-  */
+ * Converts a sequence of string to CSV string
+ *
+ * @param params Parameters object for configuration
+ * @param headers headers for columns
+ */
 private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging {
   private val writerSettings = new CsvWriterSettings
   private val format = writerSettings.getFormat
@@ -90,18 +90,18 @@ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten
 }
 
 /**
-  * Parser for parsing a line at a time. Not efficient for bulk data.
-  *
-  * @param params Parameters object
-  */
+ * Parser for parsing a line at a time. Not efficient for bulk data.
+ *
+ * @param params Parameters object
+ */
 private[sql] class LineCsvReader(params: CSVOptions)
   extends CsvReader(params, null) {
   /**
-    * parse a line
-    *
-    * @param line a String with no newline at the end
-    * @return array of strings where each string is a field in the CSV record
-    */
+   * parse a line
+   *
+   * @param line a String with no newline at the end
+   * @return array of strings where each string is a field in the CSV record
+   */
   def parseLine(line: String): Array[String] = {
     parser.beginParsing(new StringReader(line))
     val parsed = parser.parseNext()
@@ -111,12 +111,12 @@ private[sql] class LineCsvReader(params: CSVOptions)
 }
 
 /**
-  * Parser for parsing lines in bulk. Use this when efficiency is desired.
-  *
-  * @param iter iterator over lines in the file
-  * @param params Parameters object
-  * @param headers headers for the columns
-  */
+ * Parser for parsing lines in bulk. Use this when efficiency is desired.
+ *
+ * @param iter iterator over lines in the file
+ * @param params Parameters object
+ * @param headers headers for the columns
+ */
 private[sql] class BulkCsvReader(
     iter: Iterator[String],
     params: CSVOptions,
@@ -128,9 +128,9 @@ private[sql] class BulkCsvReader(
   private var nextRecord = parser.parseNext()
 
   /**
-    * get the next parsed line.
-    * @return array of strings where each string is a field in the CSV record
-    */
+   * get the next parsed line.
+   * @return array of strings where each string is a field in the CSV record
+   */
   override def next(): Array[String] = {
     val curRecord = nextRecord
     if(curRecord != null) {
@@ -146,11 +146,11 @@ private[sql] class BulkCsvReader(
 }
 
 /**
-  * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at
-  * end of each line Univocity parser requires a Reader that provides access to the data to be
-  * parsed and needs the newlines to be present
-  * @param iter iterator over RDD[String]
-  */
+ * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at
+ * end of each line Univocity parser requires a Reader that provides access to the data to be
+ * parsed and needs the newlines to be present
+ * @param iter iterator over RDD[String]
+ */
 private class StringIteratorReader(val iter: Iterator[String]) extends java.io.Reader {
 
   private var next: Long = 0
@@ -159,9 +159,9 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
   private var str: String = null   // current string from iter
 
   /**
-    * fetch next string from iter, if done with current one
-    * pretend there is a new line at the end of every string we get from from iter
-    */
+   * fetch next string from iter, if done with current one
+   * pretend there is a new line at the end of every string we get from from iter
+   */
   private def refill(): Unit = {
     if (length == next) {
       if (iter.hasNext) {
@@ -175,8 +175,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
   }
 
   /**
-    * read the next character, if at end of string pretend there is a new line
-    */
+   * read the next character, if at end of string pretend there is a new line
+   */
   override def read(): Int = {
     refill()
     if (next >= length) {
@@ -189,8 +189,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
   }
 
   /**
-    * read from str into cbuf
-    */
+   * read from str into cbuf
+   */
   override def read(cbuf: Array[Char], off: Int, len: Int): Int = {
     refill()
     var n = 0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
index b47328a3dd..54fb03b6d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -42,12 +42,12 @@ object CSVRelation extends Logging {
       firstLine: String,
       params: CSVOptions): RDD[Array[String]] = {
     // If header is set, make sure firstLine is materialized before sending to executors.
-    file.mapPartitionsWithIndex({
-      case (split, iter) => new BulkCsvReader(
+    file.mapPartitions { iter =>
+      new BulkCsvReader(
         if (params.headerFlag) iter.filterNot(_ == firstLine) else iter,
         params,
         headers = header)
-    }, true)
+    }
   }
 
   def csvParser(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
index 6b6add48cd..c0d6f6fbf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
@@ -164,7 +164,6 @@ class DefaultSource extends FileFormat with DataSourceRegister {
     }
   }
 
-
   private def baseRdd(
       sqlContext: SQLContext,
       options: CSVOptions,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
index c0c38c6787..dc54883277 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
@@ -46,7 +46,7 @@ class CSVParserSuite extends SparkFunSuite {
     var numRead = 0
       var n = 0
       do { // try to fill cbuf
-      var off = 0
+        var off = 0
         var len = cbuf.length
         n = reader.read(cbuf, off, len)
 
@@ -81,7 +81,7 @@ class CSVParserSuite extends SparkFunSuite {
   test("Regular case") {
     val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"")
     val read = readAll(input.toIterator)
-    assert(read === input.mkString("\n") ++ ("\n"))
+    assert(read === input.mkString("\n") ++ "\n")
   }
 
   test("Empty iter") {
@@ -93,12 +93,12 @@ class CSVParserSuite extends SparkFunSuite {
   test("Embedded new line") {
     val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"")
     val read = readAll(input.toIterator)
-    assert(read === input.mkString("\n") ++ ("\n"))
+    assert(read === input.mkString("\n") ++ "\n")
   }
 
   test("Buffer Regular case") {
     val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"")
-    val output = input.mkString("\n") ++ ("\n")
+    val output = input.mkString("\n") ++ "\n"
     for(i <- 1 to output.length + 5) {
       val read = readBufAll(input.toIterator, i)
       assert(read === output)
@@ -116,7 +116,7 @@ class CSVParserSuite extends SparkFunSuite {
 
   test("Buffer Embedded new line") {
     val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"")
-    val output = input.mkString("\n") ++ ("\n")
+    val output = input.mkString("\n") ++ "\n"
     for(i <- 1 to output.length + 5) {
       val read = readBufAll(input.toIterator, 1)
       assert(read === output)
author	hyukjinkwon <gurwls223@gmail.com>	2016-04-01 22:51:47 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-04-01 22:51:47 -0700
commit	d7982a3a9aa804e7e3a2004335e7f314867a5f8a (patch)
tree	d9c7604c13525a96c564e34c51b6e70648bc7bdf /sql
parent	f414154418c2291448954b9f0890d592b2d823ae (diff)
download	spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.gz spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.bz2 spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.zip