diff options
author | Cheng Hao <hao.cheng@intel.com> | 2014-11-07 12:15:53 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-11-07 12:15:53 -0800 |
commit | 60ab80f501b8384ddf48a9ac0ba0c2b9eb548b28 (patch) | |
tree | 85e3f1dd8348647d8d0668c7c1c1cef266163e7a /sql/hive | |
parent | 14c54f1876fcf91b5c10e80be2df5421c7328557 (diff) | |
download | spark-60ab80f501b8384ddf48a9ac0ba0c2b9eb548b28.tar.gz spark-60ab80f501b8384ddf48a9ac0ba0c2b9eb548b28.tar.bz2 spark-60ab80f501b8384ddf48a9ac0ba0c2b9eb548b28.zip |
[SPARK-4272] [SQL] Add more unwrapper functions for primitive type in TableReader
Currently, the data "unwrap" only support couple of primitive types, not all, it will not cause exception, but may get some performance in table scanning for the type like binary, date, timestamp, decimal etc.
Author: Cheng Hao <hao.cheng@intel.com>
Closes #3136 from chenghao-intel/table_reader and squashes the following commits:
fffb729 [Cheng Hao] fix bug for retrieving the timestamp object
e9c97a4 [Cheng Hao] Add more unwrapper functions for primitive type in TableReader
Diffstat (limited to 'sql/hive')
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 4 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala | 15 |
2 files changed, 15 insertions, 4 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 58815daa82..bdc7e1dac1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -119,10 +119,6 @@ private[hive] trait HiveInspectors { * Wraps with Hive types based on object inspector. * TODO: Consolidate all hive OI/data interface code. */ - /** - * Wraps with Hive types based on object inspector. - * TODO: Consolidate all hive OI/data interface code. - */ protected def wrapperFor(oi: ObjectInspector): Any => Any = oi match { case _: JavaHiveVarcharObjectInspector => (o: Any) => new HiveVarchar(o.asInstanceOf[String], o.asInstanceOf[String].size) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index e49f0957d1..f60bc3788e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -290,6 +290,21 @@ private[hive] object HadoopTableReader extends HiveInspectors { (value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value)) case oi: DoubleObjectInspector => (value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value)) + case oi: HiveVarcharObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => + row.setString(ordinal, oi.getPrimitiveJavaObject(value).getValue) + case oi: HiveDecimalObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => + row.update(ordinal, HiveShim.toCatalystDecimal(oi, value)) + case oi: TimestampObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => + row.update(ordinal, oi.getPrimitiveJavaObject(value).clone()) + case oi: DateObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => + row.update(ordinal, oi.getPrimitiveJavaObject(value)) + case oi: BinaryObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => + row.update(ordinal, oi.getPrimitiveJavaObject(value)) case oi => (value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi) } |