From ae226283e19ce396216c73b0ae2470efa122b65b Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 3 Aug 2016 08:23:26 +0800 Subject: [SQL][MINOR] use stricter type parameter to make it clear that parquet reader returns UnsafeRow ## What changes were proposed in this pull request? a small code style change, it's better to make the type parameter more accurate. ## How was this patch tested? N/A Author: Wenchen Fan Closes #14458 from cloud-fan/parquet. --- .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 4 ++-- .../sql/execution/datasources/parquet/ParquetReadSupport.scala | 10 +++++----- .../datasources/parquet/ParquetRecordMaterializer.scala | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'sql') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 772e031ea7..c3e75f1934 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -370,11 +370,11 @@ private[sql] class ParquetFileFormat logDebug(s"Falling back to parquet-mr") val reader = pushed match { case Some(filter) => - new ParquetRecordReader[InternalRow]( + new ParquetRecordReader[UnsafeRow]( new ParquetReadSupport, FilterCompat.get(filter, null)) case _ => - new ParquetRecordReader[InternalRow](new ParquetReadSupport) + new ParquetRecordReader[UnsafeRow](new ParquetReadSupport) } reader.initialize(split, hadoopAttemptContext) reader diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala index 8a2e0d7995..f1a35dd8a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala @@ -29,12 +29,12 @@ import org.apache.parquet.schema._ import org.apache.parquet.schema.Type.Repetition import org.apache.spark.internal.Logging -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.types._ /** * A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst - * [[InternalRow]]s. + * [[UnsafeRow]]s. * * The API interface of [[ReadSupport]] is a little bit over complicated because of historical * reasons. In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need to be @@ -48,7 +48,7 @@ import org.apache.spark.sql.types._ * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]] * to [[prepareForRead()]], but use a private `var` for simplicity. */ -private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with Logging { +private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Logging { private var catalystRequestedSchema: StructType = _ /** @@ -72,13 +72,13 @@ private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with /** * Called on executor side after [[init()]], before instantiating actual Parquet record readers. * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet - * records to Catalyst [[InternalRow]]s. + * records to Catalyst [[UnsafeRow]]s. */ override def prepareForRead( conf: Configuration, keyValueMetaData: JMap[String, String], fileSchema: MessageType, - readContext: ReadContext): RecordMaterializer[InternalRow] = { + readContext: ReadContext): RecordMaterializer[UnsafeRow] = { log.debug(s"Preparing for read Parquet file with message type: $fileSchema") val parquetRequestedSchema = readContext.getRequestedSchema diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala index d12e780528..4e49a0dac9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer} import org.apache.parquet.schema.MessageType -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.types.StructType /** @@ -32,12 +32,12 @@ import org.apache.spark.sql.types.StructType */ private[parquet] class ParquetRecordMaterializer( parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetSchemaConverter) - extends RecordMaterializer[InternalRow] { + extends RecordMaterializer[UnsafeRow] { private val rootConverter = new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater) - override def getCurrentRecord: InternalRow = rootConverter.currentRecord + override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord override def getRootConverter: GroupConverter = rootConverter } -- cgit v1.2.3