aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/v0.13.1
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2014-10-28 19:11:57 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-28 19:11:57 -0700
commitb5e79bf889700159d490cdac1f6322dff424b1d9 (patch)
treec5befc6a89689ec7f4c70f0cee73a19d45819578 /sql/hive/v0.13.1
parent1536d70331e9a4f5b5ea9dabfd72592ca1fc8e35 (diff)
downloadspark-b5e79bf889700159d490cdac1f6322dff424b1d9.tar.gz
spark-b5e79bf889700159d490cdac1f6322dff424b1d9.tar.bz2
spark-b5e79bf889700159d490cdac1f6322dff424b1d9.zip
[SPARK-3904] [SQL] add constant objectinspector support for udfs
In HQL, we convert all of the data type into normal `ObjectInspector`s for UDFs, most of cases it works, however, some of the UDF actually requires its children `ObjectInspector` to be the `ConstantObjectInspector`, which will cause exception. e.g. select named_struct("x", "str") from src limit 1; I updated the method `wrap` by adding the one more parameter `ObjectInspector`(to describe what it expects to wrap to, for example: java.lang.Integer or IntWritable). As well as the `unwrap` method by providing the input `ObjectInspector`. Author: Cheng Hao <hao.cheng@intel.com> Closes #2762 from chenghao-intel/udf_coi and squashes the following commits: bcacfd7 [Cheng Hao] Shim for both Hive 0.12 & 0.13.1 2416e5d [Cheng Hao] revert to hive 0.12 5793c01 [Cheng Hao] add space before while 4e56e1b [Cheng Hao] style issue 683d3fd [Cheng Hao] Add golden files fe591e4 [Cheng Hao] update HiveGenericUdf for set the ObjectInspector while constructing the DeferredObject f6740fe [Cheng Hao] Support Constant ObjectInspector for Map & List 8814c3a [Cheng Hao] Passing ContantObjectInspector(when necessary) for UDF initializing
Diffstat (limited to 'sql/hive/v0.13.1')
-rw-r--r--sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala64
1 files changed, 62 insertions, 2 deletions
diff --git a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
index b9a742cc6e..42cd65b251 100644
--- a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
+++ b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
@@ -21,6 +21,7 @@ import java.util.{ArrayList => JArrayList}
import java.util.Properties
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapred.InputFormat
import org.apache.hadoop.hive.common.StatsSetupConst
import org.apache.hadoop.hive.common.`type`.{HiveDecimal}
import org.apache.hadoop.hive.conf.HiveConf
@@ -28,10 +29,16 @@ import org.apache.hadoop.hive.ql.Context
import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory
import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Deserializer}
-import org.apache.hadoop.mapred.InputFormat
-import org.apache.spark.Logging
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
+import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
+import org.apache.hadoop.hive.serde2.{io => hiveIo}
import org.apache.hadoop.{io => hadoopIo}
+import org.apache.spark.Logging
+
import scala.collection.JavaConversions._
import scala.language.implicitConversions
@@ -54,6 +61,59 @@ private[hive] object HiveShim {
new TableDesc(inputFormatClass, outputFormatClass, properties)
}
+ def getPrimitiveWritableConstantObjectInspector(value: String): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.stringTypeInfo, new hadoopIo.Text(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Int): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.intTypeInfo, new hadoopIo.IntWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Double): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.doubleTypeInfo, new hiveIo.DoubleWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Boolean): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.booleanTypeInfo, new hadoopIo.BooleanWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Long): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.longTypeInfo, new hadoopIo.LongWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Float): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.floatTypeInfo, new hadoopIo.FloatWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Short): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.shortTypeInfo, new hiveIo.ShortWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Byte): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.byteTypeInfo, new hiveIo.ByteWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: Array[Byte]): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.binaryTypeInfo, new hadoopIo.BytesWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: java.sql.Date): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.dateTypeInfo, new hiveIo.DateWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: java.sql.Timestamp): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.timestampTypeInfo, new hiveIo.TimestampWritable(value))
+
+ def getPrimitiveWritableConstantObjectInspector(value: BigDecimal): ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.decimalTypeInfo,
+ new hiveIo.HiveDecimalWritable(HiveShim.createDecimal(value.underlying())))
+
+ def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
+ TypeInfoFactory.voidTypeInfo, null)
+
def createDriverResultsArray = new JArrayList[Object]
def processResults(results: JArrayList[Object]) = {