diff options
author | Takeshi Yamamuro <yamamuro@apache.org> | 2017-03-14 18:51:05 +0100 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2017-03-14 18:51:05 +0100 |
commit | 6325a2f82a95a63bee020122620bc4f5fd25d059 (patch) | |
tree | 329af4d0b02ce6457fb7ac2536254873ab68618c /sql | |
parent | a02a0b1703dafab541c9b57939e3ed37e412d0f8 (diff) | |
download | spark-6325a2f82a95a63bee020122620bc4f5fd25d059.tar.gz spark-6325a2f82a95a63bee020122620bc4f5fd25d059.tar.bz2 spark-6325a2f82a95a63bee020122620bc4f5fd25d059.zip |
[SPARK-19923][SQL] Remove unnecessary type conversions per call in Hive
## What changes were proposed in this pull request?
This pr removed unnecessary type conversions per call in Hive: https://github.com/apache/spark/blob/master/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala#L116
## How was this patch tested?
Existing tests
Author: Takeshi Yamamuro <yamamuro@apache.org>
Closes #17264 from maropu/SPARK-19923.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala | 3 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 13 |
2 files changed, 11 insertions, 5 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 506949cb68..51c814cf32 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -108,12 +108,13 @@ private[hive] case class HiveSimpleUDF( private[hive] class DeferredObjectAdapter(oi: ObjectInspector, dataType: DataType) extends DeferredObject with HiveInspectors { + private val wrapper = wrapperFor(oi, dataType) private var func: () => Any = _ def set(func: () => Any): Unit = { this.func = func } override def prepare(i: Int): Unit = {} - override def get(): AnyRef = wrap(func(), oi, dataType) + override def get(): AnyRef = wrapper(func()).asInstanceOf[AnyRef] } private[hive] case class HiveGenericUDF( diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index f496c01ce9..3a34ec55c8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -20,6 +20,8 @@ package org.apache.spark.sql.hive.orc import java.net.URI import java.util.Properties +import scala.collection.JavaConverters._ + import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.hive.conf.HiveConf.ConfVars @@ -196,6 +198,11 @@ private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration) private[this] val cachedOrcStruct = structOI.create().asInstanceOf[OrcStruct] + // Wrapper functions used to wrap Spark SQL input arguments into Hive specific format + private[this] val wrappers = dataSchema.zip(structOI.getAllStructFieldRefs().asScala.toSeq).map { + case (f, i) => wrapperFor(i.getFieldObjectInspector, f.dataType) + } + private[this] def wrapOrcStruct( struct: OrcStruct, oi: SettableStructObjectInspector, @@ -208,10 +215,8 @@ private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration) oi.setStructFieldData( struct, fieldRefs.get(i), - wrap( - row.get(i, dataSchema(i).dataType), - fieldRefs.get(i).getFieldObjectInspector, - dataSchema(i).dataType)) + wrappers(i)(row.get(i, dataSchema(i).dataType)) + ) i += 1 } } |