aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-04-01 22:51:47 -0700
committerReynold Xin <rxin@databricks.com>2016-04-01 22:51:47 -0700
commitd7982a3a9aa804e7e3a2004335e7f314867a5f8a (patch)
treed9c7604c13525a96c564e34c51b6e70648bc7bdf
parentf414154418c2291448954b9f0890d592b2d823ae (diff)
downloadspark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.gz
spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.tar.bz2
spark-d7982a3a9aa804e7e3a2004335e7f314867a5f8a.zip
[MINOR][SQL] Fix comments styl and correct several styles and nits in CSV data source
## What changes were proposed in this pull request? While trying to create a PR (which was not an issue at the end), I just corrected some style nits. So, I removed the changes except for some coding style corrections. - According to the [scala-style-guide#documentation-style](https://github.com/databricks/scala-style-guide#documentation-style), Scala style comments are discouraged. >```scala >/** This is a correct one-liner, short description. */ > >/** > * This is correct multi-line JavaDoc comment. And > * this is my second line, and if I keep typing, this would be > * my third line. > */ > >/** In Spark, we don't use the ScalaDoc style so this > * is not correct. > */ >``` - Double newlines between consecutive methods was removed. According to [scala-style-guide#blank-lines-vertical-whitespace](https://github.com/databricks/scala-style-guide#blank-lines-vertical-whitespace), single newline appears when >Between consecutive members (or initializers) of a class: fields, constructors, methods, nested classes, static initializers, instance initializers. - Remove uesless parentheses in tests - Use `mapPartitions` instead of `mapPartitionsWithIndex()`. ## How was this patch tested? Unit tests were used and `dev/run_tests` for style tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #12109 from HyukjinKwon/SPARK-14271.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala80
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala1
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala10
4 files changed, 48 insertions, 49 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
index 7cf1b4c662..5570b2c173 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVParser.scala
@@ -25,11 +25,11 @@ import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWr
import org.apache.spark.internal.Logging
/**
- * Read and parse CSV-like input
- *
- * @param params Parameters object
- * @param headers headers for the columns
- */
+ * Read and parse CSV-like input
+ *
+ * @param params Parameters object
+ * @param headers headers for the columns
+ */
private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String]) {
protected lazy val parser: CsvParser = {
@@ -54,11 +54,11 @@ private[sql] abstract class CsvReader(params: CSVOptions, headers: Seq[String])
}
/**
- * Converts a sequence of string to CSV string
- *
- * @param params Parameters object for configuration
- * @param headers headers for columns
- */
+ * Converts a sequence of string to CSV string
+ *
+ * @param params Parameters object for configuration
+ * @param headers headers for columns
+ */
private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) extends Logging {
private val writerSettings = new CsvWriterSettings
private val format = writerSettings.getFormat
@@ -90,18 +90,18 @@ private[sql] class LineCsvWriter(params: CSVOptions, headers: Seq[String]) exten
}
/**
- * Parser for parsing a line at a time. Not efficient for bulk data.
- *
- * @param params Parameters object
- */
+ * Parser for parsing a line at a time. Not efficient for bulk data.
+ *
+ * @param params Parameters object
+ */
private[sql] class LineCsvReader(params: CSVOptions)
extends CsvReader(params, null) {
/**
- * parse a line
- *
- * @param line a String with no newline at the end
- * @return array of strings where each string is a field in the CSV record
- */
+ * parse a line
+ *
+ * @param line a String with no newline at the end
+ * @return array of strings where each string is a field in the CSV record
+ */
def parseLine(line: String): Array[String] = {
parser.beginParsing(new StringReader(line))
val parsed = parser.parseNext()
@@ -111,12 +111,12 @@ private[sql] class LineCsvReader(params: CSVOptions)
}
/**
- * Parser for parsing lines in bulk. Use this when efficiency is desired.
- *
- * @param iter iterator over lines in the file
- * @param params Parameters object
- * @param headers headers for the columns
- */
+ * Parser for parsing lines in bulk. Use this when efficiency is desired.
+ *
+ * @param iter iterator over lines in the file
+ * @param params Parameters object
+ * @param headers headers for the columns
+ */
private[sql] class BulkCsvReader(
iter: Iterator[String],
params: CSVOptions,
@@ -128,9 +128,9 @@ private[sql] class BulkCsvReader(
private var nextRecord = parser.parseNext()
/**
- * get the next parsed line.
- * @return array of strings where each string is a field in the CSV record
- */
+ * get the next parsed line.
+ * @return array of strings where each string is a field in the CSV record
+ */
override def next(): Array[String] = {
val curRecord = nextRecord
if(curRecord != null) {
@@ -146,11 +146,11 @@ private[sql] class BulkCsvReader(
}
/**
- * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at
- * end of each line Univocity parser requires a Reader that provides access to the data to be
- * parsed and needs the newlines to be present
- * @param iter iterator over RDD[String]
- */
+ * A Reader that "reads" from a sequence of lines. Spark's textFile method removes newlines at
+ * end of each line Univocity parser requires a Reader that provides access to the data to be
+ * parsed and needs the newlines to be present
+ * @param iter iterator over RDD[String]
+ */
private class StringIteratorReader(val iter: Iterator[String]) extends java.io.Reader {
private var next: Long = 0
@@ -159,9 +159,9 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
private var str: String = null // current string from iter
/**
- * fetch next string from iter, if done with current one
- * pretend there is a new line at the end of every string we get from from iter
- */
+ * fetch next string from iter, if done with current one
+ * pretend there is a new line at the end of every string we get from from iter
+ */
private def refill(): Unit = {
if (length == next) {
if (iter.hasNext) {
@@ -175,8 +175,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
}
/**
- * read the next character, if at end of string pretend there is a new line
- */
+ * read the next character, if at end of string pretend there is a new line
+ */
override def read(): Int = {
refill()
if (next >= length) {
@@ -189,8 +189,8 @@ private class StringIteratorReader(val iter: Iterator[String]) extends java.io.R
}
/**
- * read from str into cbuf
- */
+ * read from str into cbuf
+ */
override def read(cbuf: Array[Char], off: Int, len: Int): Int = {
refill()
var n = 0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
index b47328a3dd..54fb03b6d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -42,12 +42,12 @@ object CSVRelation extends Logging {
firstLine: String,
params: CSVOptions): RDD[Array[String]] = {
// If header is set, make sure firstLine is materialized before sending to executors.
- file.mapPartitionsWithIndex({
- case (split, iter) => new BulkCsvReader(
+ file.mapPartitions { iter =>
+ new BulkCsvReader(
if (params.headerFlag) iter.filterNot(_ == firstLine) else iter,
params,
headers = header)
- }, true)
+ }
}
def csvParser(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
index 6b6add48cd..c0d6f6fbf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/DefaultSource.scala
@@ -164,7 +164,6 @@ class DefaultSource extends FileFormat with DataSourceRegister {
}
}
-
private def baseRdd(
sqlContext: SQLContext,
options: CSVOptions,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
index c0c38c6787..dc54883277 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParserSuite.scala
@@ -46,7 +46,7 @@ class CSVParserSuite extends SparkFunSuite {
var numRead = 0
var n = 0
do { // try to fill cbuf
- var off = 0
+ var off = 0
var len = cbuf.length
n = reader.read(cbuf, off, len)
@@ -81,7 +81,7 @@ class CSVParserSuite extends SparkFunSuite {
test("Regular case") {
val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"")
val read = readAll(input.toIterator)
- assert(read === input.mkString("\n") ++ ("\n"))
+ assert(read === input.mkString("\n") ++ "\n")
}
test("Empty iter") {
@@ -93,12 +93,12 @@ class CSVParserSuite extends SparkFunSuite {
test("Embedded new line") {
val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"")
val read = readAll(input.toIterator)
- assert(read === input.mkString("\n") ++ ("\n"))
+ assert(read === input.mkString("\n") ++ "\n")
}
test("Buffer Regular case") {
val input = List("This is a string", "This is another string", "Small", "", "\"quoted\"")
- val output = input.mkString("\n") ++ ("\n")
+ val output = input.mkString("\n") ++ "\n"
for(i <- 1 to output.length + 5) {
val read = readBufAll(input.toIterator, i)
assert(read === output)
@@ -116,7 +116,7 @@ class CSVParserSuite extends SparkFunSuite {
test("Buffer Embedded new line") {
val input = List("This is a string", "This is another string", "Small\n", "", "\"quoted\"")
- val output = input.mkString("\n") ++ ("\n")
+ val output = input.mkString("\n") ++ "\n"
for(i <- 1 to output.length + 5) {
val read = readBufAll(input.toIterator, 1)
assert(read === output)