1 files changed, 36 insertions, 32 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index 73e6abc6da..47567032b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -133,20 +133,24 @@ object TextInputCSVDataSource extends CSVDataSource {
       sparkSession: SparkSession,
       inputPaths: Seq[FileStatus],
       parsedOptions: CSVOptions): Option[StructType] = {
-    val csv: Dataset[String] = createBaseDataset(sparkSession, inputPaths, parsedOptions)
-    val firstLine: String = CSVUtils.filterCommentAndEmpty(csv, parsedOptions).first()
-    val firstRow = new CsvParser(parsedOptions.asParserSettings).parseLine(firstLine)
-    val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
-    val header = makeSafeHeader(firstRow, caseSensitive, parsedOptions)
-    val tokenRDD = csv.rdd.mapPartitions { iter =>
-      val filteredLines = CSVUtils.filterCommentAndEmpty(iter, parsedOptions)
-      val linesWithoutHeader =
-        CSVUtils.filterHeaderLine(filteredLines, firstLine, parsedOptions)
-      val parser = new CsvParser(parsedOptions.asParserSettings)
-      linesWithoutHeader.map(parser.parseLine)
+    val csv = createBaseDataset(sparkSession, inputPaths, parsedOptions)
+    CSVUtils.filterCommentAndEmpty(csv, parsedOptions).take(1).headOption match {
+      case Some(firstLine) =>
+        val firstRow = new CsvParser(parsedOptions.asParserSettings).parseLine(firstLine)
+        val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+        val header = makeSafeHeader(firstRow, caseSensitive, parsedOptions)
+        val tokenRDD = csv.rdd.mapPartitions { iter =>
+          val filteredLines = CSVUtils.filterCommentAndEmpty(iter, parsedOptions)
+          val linesWithoutHeader =
+            CSVUtils.filterHeaderLine(filteredLines, firstLine, parsedOptions)
+          val parser = new CsvParser(parsedOptions.asParserSettings)
+          linesWithoutHeader.map(parser.parseLine)
+        }
+        Some(CSVInferSchema.infer(tokenRDD, header, parsedOptions))
+      case None =>
+        // If the first line could not be read, just return the empty schema.
+        Some(StructType(Nil))
     }
-
-    Some(CSVInferSchema.infer(tokenRDD, header, parsedOptions))
   }
 
   private def createBaseDataset(
@@ -190,28 +194,28 @@ object WholeFileCSVDataSource extends CSVDataSource {
       sparkSession: SparkSession,
       inputPaths: Seq[FileStatus],
       parsedOptions: CSVOptions): Option[StructType] = {
-    val csv: RDD[PortableDataStream] = createBaseRdd(sparkSession, inputPaths, parsedOptions)
-    val maybeFirstRow: Option[Array[String]] = csv.flatMap { lines =>
+    val csv = createBaseRdd(sparkSession, inputPaths, parsedOptions)
+    csv.flatMap { lines =>
       UnivocityParser.tokenizeStream(
         CodecStreams.createInputStreamWithCloseResource(lines.getConfiguration, lines.getPath()),
-        false,
+        shouldDropHeader = false,
         new CsvParser(parsedOptions.asParserSettings))
-    }.take(1).headOption
-
-    if (maybeFirstRow.isDefined) {
-      val firstRow = maybeFirstRow.get
-      val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
-      val header = makeSafeHeader(firstRow, caseSensitive, parsedOptions)
-      val tokenRDD = csv.flatMap { lines =>
-        UnivocityParser.tokenizeStream(
-          CodecStreams.createInputStreamWithCloseResource(lines.getConfiguration, lines.getPath()),
-          parsedOptions.headerFlag,
-          new CsvParser(parsedOptions.asParserSettings))
-      }
-      Some(CSVInferSchema.infer(tokenRDD, header, parsedOptions))
-    } else {
-      // If the first row could not be read, just return the empty schema.
-      Some(StructType(Nil))
+    }.take(1).headOption match {
+      case Some(firstRow) =>
+        val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+        val header = makeSafeHeader(firstRow, caseSensitive, parsedOptions)
+        val tokenRDD = csv.flatMap { lines =>
+          UnivocityParser.tokenizeStream(
+            CodecStreams.createInputStreamWithCloseResource(
+              lines.getConfiguration,
+              lines.getPath()),
+            parsedOptions.headerFlag,
+            new CsvParser(parsedOptions.asParserSettings))
+        }
+        Some(CSVInferSchema.infer(tokenRDD, header, parsedOptions))
+      case None =>
+        // If the first row could not be read, just return the empty schema.
+        Some(StructType(Nil))
     }
   }