aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorXiu Guo <xguo27@gmail.com>2015-11-23 08:53:40 -0800
committerYin Huai <yhuai@databricks.com>2015-11-23 08:53:40 -0800
commit94ce65dfcbba1fe3a1fc9d8002c37d9cd1a11336 (patch)
tree65d3262c5538a085058d873fcbce72b39b55b910 /sql/hive
parent4be360d4ee6cdb4d06306feca38ddef5212608cf (diff)
downloadspark-94ce65dfcbba1fe3a1fc9d8002c37d9cd1a11336.tar.gz
spark-94ce65dfcbba1fe3a1fc9d8002c37d9cd1a11336.tar.bz2
spark-94ce65dfcbba1fe3a1fc9d8002c37d9cd1a11336.zip
[SPARK-11628][SQL] support column datatype of char(x) to recognize HiveChar
Can someone review my code to make sure I'm not missing anything? Thanks! Author: Xiu Guo <xguo27@gmail.com> Author: Xiu Guo <guoxi@us.ibm.com> Closes #9612 from xguo27/SPARK-11628.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala25
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala3
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala3
3 files changed, 27 insertions, 4 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 36f0708f9d..95b57d6ad1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive
import scala.collection.JavaConverters._
-import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar}
+import org.apache.hadoop.hive.common.`type`.{HiveChar, HiveDecimal, HiveVarchar}
import org.apache.hadoop.hive.serde2.objectinspector.primitive._
import org.apache.hadoop.hive.serde2.objectinspector.{StructField => HiveStructField, _}
import org.apache.hadoop.hive.serde2.typeinfo.{DecimalTypeInfo, TypeInfoFactory}
@@ -61,6 +61,7 @@ import org.apache.spark.unsafe.types.UTF8String
* Primitive Type
* Java Boxed Primitives:
* org.apache.hadoop.hive.common.type.HiveVarchar
+ * org.apache.hadoop.hive.common.type.HiveChar
* java.lang.String
* java.lang.Integer
* java.lang.Boolean
@@ -75,6 +76,7 @@ import org.apache.spark.unsafe.types.UTF8String
* java.sql.Timestamp
* Writables:
* org.apache.hadoop.hive.serde2.io.HiveVarcharWritable
+ * org.apache.hadoop.hive.serde2.io.HiveCharWritable
* org.apache.hadoop.io.Text
* org.apache.hadoop.io.IntWritable
* org.apache.hadoop.hive.serde2.io.DoubleWritable
@@ -93,7 +95,8 @@ import org.apache.spark.unsafe.types.UTF8String
* Struct: Object[] / java.util.List / java POJO
* Union: class StandardUnion { byte tag; Object object }
*
- * NOTICE: HiveVarchar is not supported by catalyst, it will be simply considered as String type.
+ * NOTICE: HiveVarchar/HiveChar is not supported by catalyst, it will be simply considered as
+ * String type.
*
*
* 2. Hive ObjectInspector is a group of flexible APIs to inspect value in different data
@@ -137,6 +140,7 @@ import org.apache.spark.unsafe.types.UTF8String
* Primitive Object Inspectors:
* WritableConstantStringObjectInspector
* WritableConstantHiveVarcharObjectInspector
+ * WritableConstantHiveCharObjectInspector
* WritableConstantHiveDecimalObjectInspector
* WritableConstantTimestampObjectInspector
* WritableConstantIntObjectInspector
@@ -259,6 +263,8 @@ private[hive] trait HiveInspectors {
UTF8String.fromString(poi.getWritableConstantValue.toString)
case poi: WritableConstantHiveVarcharObjectInspector =>
UTF8String.fromString(poi.getWritableConstantValue.getHiveVarchar.getValue)
+ case poi: WritableConstantHiveCharObjectInspector =>
+ UTF8String.fromString(poi.getWritableConstantValue.getHiveChar.getValue)
case poi: WritableConstantHiveDecimalObjectInspector =>
HiveShim.toCatalystDecimal(
PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector,
@@ -303,11 +309,15 @@ private[hive] trait HiveInspectors {
case _ if data == null => null
case poi: VoidObjectInspector => null // always be null for void object inspector
case pi: PrimitiveObjectInspector => pi match {
- // We think HiveVarchar is also a String
+ // We think HiveVarchar/HiveChar is also a String
case hvoi: HiveVarcharObjectInspector if hvoi.preferWritable() =>
UTF8String.fromString(hvoi.getPrimitiveWritableObject(data).getHiveVarchar.getValue)
case hvoi: HiveVarcharObjectInspector =>
UTF8String.fromString(hvoi.getPrimitiveJavaObject(data).getValue)
+ case hvoi: HiveCharObjectInspector if hvoi.preferWritable() =>
+ UTF8String.fromString(hvoi.getPrimitiveWritableObject(data).getHiveChar.getValue)
+ case hvoi: HiveCharObjectInspector =>
+ UTF8String.fromString(hvoi.getPrimitiveJavaObject(data).getValue)
case x: StringObjectInspector if x.preferWritable() =>
UTF8String.fromString(x.getPrimitiveWritableObject(data).toString)
case x: StringObjectInspector =>
@@ -377,6 +387,15 @@ private[hive] trait HiveInspectors {
null
}
+ case _: JavaHiveCharObjectInspector =>
+ (o: Any) =>
+ if (o != null) {
+ val s = o.asInstanceOf[UTF8String].toString
+ new HiveChar(s, s.size)
+ } else {
+ null
+ }
+
case _: JavaHiveDecimalObjectInspector =>
(o: Any) =>
if (o != null) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 69f481c49a..70ee02823e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -382,6 +382,9 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
case oi: HiveVarcharObjectInspector =>
(value: Any, row: MutableRow, ordinal: Int) =>
row.update(ordinal, UTF8String.fromString(oi.getPrimitiveJavaObject(value).getValue))
+ case oi: HiveCharObjectInspector =>
+ (value: Any, row: MutableRow, ordinal: Int) =>
+ row.update(ordinal, UTF8String.fromString(oi.getPrimitiveJavaObject(value).getValue))
case oi: HiveDecimalObjectInspector =>
(value: Any, row: MutableRow, ordinal: Int) =>
row.update(ordinal, HiveShim.toCatalystDecimal(oi, value))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 48bbb21e6c..346840079b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -321,7 +321,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
def convertFilters(table: Table, filters: Seq[Expression]): String = {
// hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
val varcharKeys = table.getPartitionKeys.asScala
- .filter(col => col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME))
+ .filter(col => col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) ||
+ col.getType.startsWith(serdeConstants.CHAR_TYPE_NAME))
.map(col => col.getName).toSet
filters.collect {