diff options
author | Reynold Xin <rxin@databricks.com> | 2016-01-26 00:51:08 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-01-26 00:51:08 -0800 |
commit | d54cfed5a6953a9ce2b9de2f31ee2d673cb5cc62 (patch) | |
tree | fdf4950185475a1d07ab0fe0dd0d637c1eceab04 | |
parent | 27c910f7f29087d1ac216d4933d641d6515fd6ad (diff) | |
download | spark-d54cfed5a6953a9ce2b9de2f31ee2d673cb5cc62.tar.gz spark-d54cfed5a6953a9ce2b9de2f31ee2d673cb5cc62.tar.bz2 spark-d54cfed5a6953a9ce2b9de2f31ee2d673cb5cc62.zip |
[SQL][MINOR] A few minor tweaks to CSV reader.
This pull request simply fixes a few minor coding style issues in csv, as I was reviewing the change post-hoc.
Author: Reynold Xin <rxin@databricks.com>
Closes #10919 from rxin/csv-minor.
2 files changed, 9 insertions, 14 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index 0aa4539e60..ace8cd7ad8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -30,16 +30,15 @@ import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion import org.apache.spark.sql.types._ -private[sql] object CSVInferSchema { +private[csv] object CSVInferSchema { /** * Similar to the JSON schema inference * 1. Infer type of each row * 2. Merge row types to find common type * 3. Replace any null types with string type - * TODO(hossein): Can we reuse JSON schema inference? [SPARK-12670] */ - def apply( + def infer( tokenRdd: RDD[Array[String]], header: Array[String], nullValue: String = ""): StructType = { @@ -65,10 +64,7 @@ private[sql] object CSVInferSchema { rowSoFar } - private[csv] def mergeRowTypes( - first: Array[DataType], - second: Array[DataType]): Array[DataType] = { - + def mergeRowTypes(first: Array[DataType], second: Array[DataType]): Array[DataType] = { first.zipAll(second, NullType, NullType).map { case ((a, b)) => val tpe = findTightestCommonType(a, b).getOrElse(StringType) tpe match { @@ -82,8 +78,7 @@ private[sql] object CSVInferSchema { * Infer type of string field. Given known type Double, and a string "1", there is no * point checking if it is an Int, as the final type must be Double or higher. */ - private[csv] def inferField( - typeSoFar: DataType, field: String, nullValue: String = ""): DataType = { + def inferField(typeSoFar: DataType, field: String, nullValue: String = ""): DataType = { if (field == null || field.isEmpty || field == nullValue) { typeSoFar } else { @@ -155,7 +150,8 @@ private[sql] object CSVInferSchema { } } -object CSVTypeCast { + +private[csv] object CSVTypeCast { /** * Casts given string datum to specified type. @@ -167,7 +163,7 @@ object CSVTypeCast { * @param datum string value * @param castType SparkSQL type */ - private[csv] def castTo( + def castTo( datum: String, castType: DataType, nullable: Boolean = true, @@ -201,10 +197,9 @@ object CSVTypeCast { * Helper method that converts string representation of a character to actual character. * It handles some Java escaped strings and throws exception if given string is longer than one * character. - * */ @throws[IllegalArgumentException] - private[csv] def toChar(str: String): Char = { + def toChar(str: String): Char = { if (str.charAt(0) == '\\') { str.charAt(1) match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala index 5959f7cc50..dc449fea95 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala @@ -139,7 +139,7 @@ private[csv] class CSVRelation( val parsedRdd = tokenRdd(header, paths) if (params.inferSchemaFlag) { - CSVInferSchema(parsedRdd, header, params.nullValue) + CSVInferSchema.infer(parsedRdd, header, params.nullValue) } else { // By default fields are assumed to be StringType val schemaFields = header.map { fieldName => |