diff options
author | Cheng Hao <hao.cheng@intel.com> | 2014-10-28 19:11:57 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-10-28 19:11:57 -0700 |
commit | b5e79bf889700159d490cdac1f6322dff424b1d9 (patch) | |
tree | c5befc6a89689ec7f4c70f0cee73a19d45819578 /sql/hive/v0.12.0 | |
parent | 1536d70331e9a4f5b5ea9dabfd72592ca1fc8e35 (diff) | |
download | spark-b5e79bf889700159d490cdac1f6322dff424b1d9.tar.gz spark-b5e79bf889700159d490cdac1f6322dff424b1d9.tar.bz2 spark-b5e79bf889700159d490cdac1f6322dff424b1d9.zip |
[SPARK-3904] [SQL] add constant objectinspector support for udfs
In HQL, we convert all of the data type into normal `ObjectInspector`s for UDFs, most of cases it works, however, some of the UDF actually requires its children `ObjectInspector` to be the `ConstantObjectInspector`, which will cause exception.
e.g.
select named_struct("x", "str") from src limit 1;
I updated the method `wrap` by adding the one more parameter `ObjectInspector`(to describe what it expects to wrap to, for example: java.lang.Integer or IntWritable).
As well as the `unwrap` method by providing the input `ObjectInspector`.
Author: Cheng Hao <hao.cheng@intel.com>
Closes #2762 from chenghao-intel/udf_coi and squashes the following commits:
bcacfd7 [Cheng Hao] Shim for both Hive 0.12 & 0.13.1
2416e5d [Cheng Hao] revert to hive 0.12
5793c01 [Cheng Hao] add space before while
4e56e1b [Cheng Hao] style issue
683d3fd [Cheng Hao] Add golden files
fe591e4 [Cheng Hao] update HiveGenericUdf for set the ObjectInspector while constructing the DeferredObject
f6740fe [Cheng Hao] Support Constant ObjectInspector for Map & List
8814c3a [Cheng Hao] Passing ContantObjectInspector(when necessary) for UDF initializing
Diffstat (limited to 'sql/hive/v0.12.0')
-rw-r--r-- | sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala index 8cb81db8a9..afc252ac27 100644 --- a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala +++ b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala @@ -29,7 +29,11 @@ import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table} import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc} import org.apache.hadoop.hive.ql.processors._ import org.apache.hadoop.hive.ql.stats.StatsSetupConst +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils} +import org.apache.hadoop.hive.serde2.{io => hiveIo} import org.apache.hadoop.{io => hadoopIo} import org.apache.hadoop.mapred.InputFormat import scala.collection.JavaConversions._ @@ -50,6 +54,59 @@ private[hive] object HiveShim { new TableDesc(serdeClass, inputFormatClass, outputFormatClass, properties) } + def getPrimitiveWritableConstantObjectInspector(value: String): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.STRING, new hadoopIo.Text(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Int): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.INT, new hadoopIo.IntWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Double): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.DOUBLE, new hiveIo.DoubleWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Boolean): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.BOOLEAN, new hadoopIo.BooleanWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Long): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.LONG, new hadoopIo.LongWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Float): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.FLOAT, new hadoopIo.FloatWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Short): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.SHORT, new hiveIo.ShortWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Byte): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.BYTE, new hiveIo.ByteWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: Array[Byte]): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.BINARY, new hadoopIo.BytesWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: java.sql.Date): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.DATE, new hiveIo.DateWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: java.sql.Timestamp): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.TIMESTAMP, new hiveIo.TimestampWritable(value)) + + def getPrimitiveWritableConstantObjectInspector(value: BigDecimal): ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.DECIMAL, + new hiveIo.HiveDecimalWritable(HiveShim.createDecimal(value.underlying()))) + + def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + PrimitiveCategory.VOID, null) + def createDriverResultsArray = new JArrayList[String] def processResults(results: JArrayList[String]) = results |