aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorMatei Zaharia <matei@databricks.com>2014-11-01 19:29:14 -0700
committerMichael Armbrust <michael@databricks.com>2014-11-01 19:29:14 -0700
commit23f966f47523f85ba440b4080eee665271f53b5e (patch)
treed796351567f8b187511b9049199cbf99c5826fb3 /sql/hive
parent56f2c61cde3f5d906c2a58e9af1a661222f2c679 (diff)
downloadspark-23f966f47523f85ba440b4080eee665271f53b5e.tar.gz
spark-23f966f47523f85ba440b4080eee665271f53b5e.tar.bz2
spark-23f966f47523f85ba440b4080eee665271f53b5e.zip
[SPARK-3930] [SPARK-3933] Support fixed-precision decimal in SQL, and some optimizations
- Adds optional precision and scale to Spark SQL's decimal type, which behave similarly to those in Hive 13 (https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf) - Replaces our internal representation of decimals with a Decimal class that can store small values in a mutable Long, saving memory in this situation and letting some operations happen directly on Longs This is still marked WIP because there are a few TODOs, but I'll remove that tag when done. Author: Matei Zaharia <matei@databricks.com> Closes #2983 from mateiz/decimal-1 and squashes the following commits: 35e6b02 [Matei Zaharia] Fix issues after merge 227f24a [Matei Zaharia] Review comments 31f915e [Matei Zaharia] Implement Davies's suggestions in Python eb84820 [Matei Zaharia] Support reading/writing decimals as fixed-length binary in Parquet 4dc6bae [Matei Zaharia] Fix decimal support in PySpark d1d9d68 [Matei Zaharia] Fix compile error and test issues after rebase b28933d [Matei Zaharia] Support decimal precision/scale in Hive metastore 2118c0d [Matei Zaharia] Some test and bug fixes 81db9cb [Matei Zaharia] Added mutable Decimal that will be more efficient for small precisions 7af0c3b [Matei Zaharia] Add optional precision and scale to DecimalType, but use Unlimited for now ec0a947 [Matei Zaharia] Make the result of AVG on Decimals be Decimal, not Double
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala9
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala24
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala14
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala15
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala3
-rw-r--r--sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala22
-rw-r--r--sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala39
7 files changed, 95 insertions, 31 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index ff8fa44194..2e27817d60 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -21,6 +21,10 @@ import java.io.{BufferedReader, File, InputStreamReader, PrintStream}
import java.sql.{Date, Timestamp}
import java.util.{ArrayList => JArrayList}
+import org.apache.hadoop.hive.common.`type`.HiveDecimal
+import org.apache.spark.sql.catalyst.types.DecimalType
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
+
import scala.collection.JavaConversions._
import scala.language.implicitConversions
import scala.reflect.runtime.universe.{TypeTag, typeTag}
@@ -370,7 +374,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
protected val primitiveTypes =
Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType,
- ShortType, DecimalType, DateType, TimestampType, BinaryType)
+ ShortType, DateType, TimestampType, BinaryType)
protected[sql] def toHiveString(a: (Any, DataType)): String = a match {
case (struct: Row, StructType(fields)) =>
@@ -388,6 +392,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
case (d: Date, DateType) => new DateWritable(d).toString
case (t: Timestamp, TimestampType) => new TimestampWritable(t).toString
case (bin: Array[Byte], BinaryType) => new String(bin, "UTF-8")
+ case (decimal: Decimal, DecimalType()) => // Hive strips trailing zeros so use its toString
+ HiveShim.createDecimal(decimal.toBigDecimal.underlying()).toString
case (other, tpe) if primitiveTypes contains tpe => other.toString
}
@@ -406,6 +412,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
}.toSeq.sorted.mkString("{", ",", "}")
case (null, _) => "null"
case (s: String, StringType) => "\"" + s + "\""
+ case (decimal, DecimalType()) => decimal.toString
case (other, tpe) if primitiveTypes contains tpe => other.toString
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 0439ab97d8..1e2bf5cc4b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -28,6 +28,7 @@ import org.apache.hadoop.{io => hadoopIo}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.types
import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
/* Implicit conversions */
import scala.collection.JavaConversions._
@@ -38,7 +39,7 @@ private[hive] trait HiveInspectors {
// writable
case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
case c: Class[_] if c == classOf[hiveIo.DoubleWritable] => DoubleType
- case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType
+ case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType.Unlimited
case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType
case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType
case c: Class[_] if c == classOf[hiveIo.DateWritable] => DateType
@@ -54,8 +55,8 @@ private[hive] trait HiveInspectors {
case c: Class[_] if c == classOf[java.lang.String] => StringType
case c: Class[_] if c == classOf[java.sql.Date] => DateType
case c: Class[_] if c == classOf[java.sql.Timestamp] => TimestampType
- case c: Class[_] if c == classOf[HiveDecimal] => DecimalType
- case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType
+ case c: Class[_] if c == classOf[HiveDecimal] => DecimalType.Unlimited
+ case c: Class[_] if c == classOf[java.math.BigDecimal] => DecimalType.Unlimited
case c: Class[_] if c == classOf[Array[Byte]] => BinaryType
case c: Class[_] if c == classOf[java.lang.Short] => ShortType
case c: Class[_] if c == classOf[java.lang.Integer] => IntegerType
@@ -90,7 +91,7 @@ private[hive] trait HiveInspectors {
case hvoi: HiveVarcharObjectInspector =>
if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue
case hdoi: HiveDecimalObjectInspector =>
- if (data == null) null else BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
+ if (data == null) null else HiveShim.toCatalystDecimal(hdoi, data)
// org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object
// if next timestamp is null, so Timestamp object is cloned
case ti: TimestampObjectInspector => ti.getPrimitiveJavaObject(data).clone()
@@ -137,8 +138,9 @@ private[hive] trait HiveInspectors {
case l: Short => l: java.lang.Short
case l: Byte => l: java.lang.Byte
case b: BigDecimal => HiveShim.createDecimal(b.underlying())
+ case d: Decimal => HiveShim.createDecimal(d.toBigDecimal.underlying())
case b: Array[Byte] => b
- case d: java.sql.Date => d
+ case d: java.sql.Date => d
case t: java.sql.Timestamp => t
}
case x: StructObjectInspector =>
@@ -200,7 +202,7 @@ private[hive] trait HiveInspectors {
case BinaryType => PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector
case DateType => PrimitiveObjectInspectorFactory.javaDateObjectInspector
case TimestampType => PrimitiveObjectInspectorFactory.javaTimestampObjectInspector
- case DecimalType => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
+ case DecimalType() => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
case StructType(fields) =>
ObjectInspectorFactory.getStandardStructObjectInspector(
fields.map(f => f.name), fields.map(f => toInspector(f.dataType)))
@@ -229,8 +231,10 @@ private[hive] trait HiveInspectors {
HiveShim.getPrimitiveWritableConstantObjectInspector(value)
case Literal(value: java.sql.Timestamp, TimestampType) =>
HiveShim.getPrimitiveWritableConstantObjectInspector(value)
- case Literal(value: BigDecimal, DecimalType) =>
+ case Literal(value: BigDecimal, DecimalType()) =>
HiveShim.getPrimitiveWritableConstantObjectInspector(value)
+ case Literal(value: Decimal, DecimalType()) =>
+ HiveShim.getPrimitiveWritableConstantObjectInspector(value.toBigDecimal)
case Literal(_, NullType) =>
HiveShim.getPrimitiveNullWritableConstantObjectInspector
case Literal(value: Seq[_], ArrayType(dt, _)) =>
@@ -277,8 +281,8 @@ private[hive] trait HiveInspectors {
case _: JavaFloatObjectInspector => FloatType
case _: WritableBinaryObjectInspector => BinaryType
case _: JavaBinaryObjectInspector => BinaryType
- case _: WritableHiveDecimalObjectInspector => DecimalType
- case _: JavaHiveDecimalObjectInspector => DecimalType
+ case w: WritableHiveDecimalObjectInspector => HiveShim.decimalTypeInfoToCatalyst(w)
+ case j: JavaHiveDecimalObjectInspector => HiveShim.decimalTypeInfoToCatalyst(j)
case _: WritableDateObjectInspector => DateType
case _: JavaDateObjectInspector => DateType
case _: WritableTimestampObjectInspector => TimestampType
@@ -307,7 +311,7 @@ private[hive] trait HiveInspectors {
case LongType => longTypeInfo
case ShortType => shortTypeInfo
case StringType => stringTypeInfo
- case DecimalType => decimalTypeInfo
+ case d: DecimalType => HiveShim.decimalTypeInfo(d)
case DateType => dateTypeInfo
case TimestampType => timestampTypeInfo
case NullType => voidTypeInfo
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 2dd2c882a8..096b4a07aa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive
import java.io.IOException
import java.util.{List => JList}
+import scala.util.matching.Regex
import scala.util.parsing.combinator.RegexParsers
import org.apache.hadoop.util.ReflectionUtils
@@ -321,11 +322,18 @@ object HiveMetastoreTypes extends RegexParsers {
"bigint" ^^^ LongType |
"binary" ^^^ BinaryType |
"boolean" ^^^ BooleanType |
- HiveShim.metastoreDecimal ^^^ DecimalType |
+ fixedDecimalType | // Hive 0.13+ decimal with precision/scale
+ "decimal" ^^^ DecimalType.Unlimited | // Hive 0.12 decimal with no precision/scale
"date" ^^^ DateType |
"timestamp" ^^^ TimestampType |
"varchar\\((\\d+)\\)".r ^^^ StringType
+ protected lazy val fixedDecimalType: Parser[DataType] =
+ ("decimal" ~> "(" ~> "\\d+".r) ~ ("," ~> "\\d+".r <~ ")") ^^ {
+ case precision ~ scale =>
+ DecimalType(precision.toInt, scale.toInt)
+ }
+
protected lazy val arrayType: Parser[DataType] =
"array" ~> "<" ~> dataType <~ ">" ^^ {
case tpe => ArrayType(tpe)
@@ -373,7 +381,7 @@ object HiveMetastoreTypes extends RegexParsers {
case BinaryType => "binary"
case BooleanType => "boolean"
case DateType => "date"
- case DecimalType => "decimal"
+ case d: DecimalType => HiveShim.decimalMetastoreString(d)
case TimestampType => "timestamp"
case NullType => "void"
}
@@ -441,7 +449,7 @@ private[hive] case class MetastoreRelation
val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
/** Non-partitionKey attributes */
- val attributes = hiveQlTable.getCols.map(_.toAttribute)
+ val attributes = hiveQlTable.getCols.map(_.toAttribute)
val output = attributes ++ partitionKeys
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index a3573e6502..74f68d0f95 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
/* Implicit conversions */
import scala.collection.JavaConversions._
@@ -325,7 +326,11 @@ private[hive] object HiveQl {
}
protected def nodeToDataType(node: Node): DataType = node match {
- case Token("TOK_DECIMAL", Nil) => DecimalType
+ case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
+ DecimalType(precision.getText.toInt, scale.getText.toInt)
+ case Token("TOK_DECIMAL", precision :: Nil) =>
+ DecimalType(precision.getText.toInt, 0)
+ case Token("TOK_DECIMAL", Nil) => DecimalType.Unlimited
case Token("TOK_BIGINT", Nil) => LongType
case Token("TOK_INT", Nil) => IntegerType
case Token("TOK_TINYINT", Nil) => ByteType
@@ -942,8 +947,12 @@ private[hive] object HiveQl {
Cast(nodeToExpr(arg), BinaryType)
case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
Cast(nodeToExpr(arg), BooleanType)
+ case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, scale.getText.toInt))
+ case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: arg :: Nil) =>
+ Cast(nodeToExpr(arg), DecimalType(precision.getText.toInt, 0))
case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
- Cast(nodeToExpr(arg), DecimalType)
+ Cast(nodeToExpr(arg), DecimalType.Unlimited)
case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) =>
Cast(nodeToExpr(arg), TimestampType)
case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
@@ -1063,7 +1072,7 @@ private[hive] object HiveQl {
} else if (ast.getText.endsWith("BD") || ast.getText.endsWith("D")) {
// Literal decimal
val strVal = ast.getText.stripSuffix("D").stripSuffix("B")
- v = Literal(BigDecimal(strVal))
+ v = Literal(Decimal(strVal))
} else {
v = Literal(ast.getText.toDouble, DoubleType)
v = Literal(ast.getText.toLong, LongType)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 79234f8a66..92bc1c6625 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.expressions.Row
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
import org.apache.spark.sql.execution.{Command, SparkPlan, UnaryNode}
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.{ ShimFileSinkDesc => FileSinkDesc}
@@ -76,7 +77,7 @@ case class InsertIntoHiveTable(
(o: Any) => new HiveVarchar(o.asInstanceOf[String], o.asInstanceOf[String].size)
case _: JavaHiveDecimalObjectInspector =>
- (o: Any) => HiveShim.createDecimal(o.asInstanceOf[BigDecimal].underlying())
+ (o: Any) => HiveShim.createDecimal(o.asInstanceOf[Decimal].toBigDecimal.underlying())
case soi: StandardStructObjectInspector =>
val wrappers = soi.getAllStructFieldRefs.map(ref => wrapperFor(ref.getFieldObjectInspector))
diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
index afc252ac27..8e946b7e82 100644
--- a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
+++ b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala
@@ -30,21 +30,24 @@ import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
import org.apache.hadoop.hive.ql.processors._
import org.apache.hadoop.hive.ql.stats.StatsSetupConst
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
+import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, TypeInfoFactory}
import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
import org.apache.hadoop.hive.serde2.{io => hiveIo}
import org.apache.hadoop.{io => hadoopIo}
import org.apache.hadoop.mapred.InputFormat
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
import scala.collection.JavaConversions._
import scala.language.implicitConversions
+import org.apache.spark.sql.catalyst.types.DecimalType
+
/**
* A compatibility layer for interacting with Hive version 0.12.0.
*/
private[hive] object HiveShim {
val version = "0.12.0"
- val metastoreDecimal = "decimal"
def getTableDesc(
serdeClass: Class[_ <: Deserializer],
@@ -149,6 +152,19 @@ private[hive] object HiveShim {
def setLocation(tbl: Table, crtTbl: CreateTableDesc): Unit = {
tbl.setDataLocation(new Path(crtTbl.getLocation()).toUri())
}
+
+ def decimalMetastoreString(decimalType: DecimalType): String = "decimal"
+
+ def decimalTypeInfo(decimalType: DecimalType): TypeInfo =
+ TypeInfoFactory.decimalTypeInfo
+
+ def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = {
+ DecimalType.Unlimited
+ }
+
+ def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = {
+ Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
+ }
}
class ShimFileSinkDesc(var dir: String, var tableInfo: TableDesc, var compressed: Boolean)
diff --git a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
index 42cd65b251..0bc330cdbe 100644
--- a/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
+++ b/sql/hive/v0.13.1/src/main/scala/org/apache/spark/sql/hive/Shim13.scala
@@ -29,15 +29,15 @@ import org.apache.hadoop.hive.ql.Context
import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory
-import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Deserializer}
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector
+import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, DecimalTypeInfo, TypeInfoFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
+import org.apache.hadoop.hive.serde2.objectinspector.{PrimitiveObjectInspector, ObjectInspector}
import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
import org.apache.hadoop.hive.serde2.{io => hiveIo}
import org.apache.hadoop.{io => hadoopIo}
import org.apache.spark.Logging
+import org.apache.spark.sql.catalyst.types.DecimalType
+import org.apache.spark.sql.catalyst.types.decimal.Decimal
import scala.collection.JavaConversions._
import scala.language.implicitConversions
@@ -47,11 +47,6 @@ import scala.language.implicitConversions
*/
private[hive] object HiveShim {
val version = "0.13.1"
- /*
- * TODO: hive-0.13 support DECIMAL(precision, scale), DECIMAL in hive-0.12 is actually DECIMAL(38,unbounded)
- * Full support of new decimal feature need to be fixed in seperate PR.
- */
- val metastoreDecimal = "decimal\\((\\d+),(\\d+)\\)".r
def getTableDesc(
serdeClass: Class[_ <: Deserializer],
@@ -197,6 +192,30 @@ private[hive] object HiveShim {
f.setDestTableId(w.destTableId)
f
}
+
+ // Precision and scale to pass for unlimited decimals; these are the same as the precision and
+ // scale Hive 0.13 infers for BigDecimals from sources that don't specify them (e.g. UDFs)
+ private val UNLIMITED_DECIMAL_PRECISION = 38
+ private val UNLIMITED_DECIMAL_SCALE = 18
+
+ def decimalMetastoreString(decimalType: DecimalType): String = decimalType match {
+ case DecimalType.Fixed(precision, scale) => s"decimal($precision,$scale)"
+ case _ => s"decimal($UNLIMITED_DECIMAL_PRECISION,$UNLIMITED_DECIMAL_SCALE)"
+ }
+
+ def decimalTypeInfo(decimalType: DecimalType): TypeInfo = decimalType match {
+ case DecimalType.Fixed(precision, scale) => new DecimalTypeInfo(precision, scale)
+ case _ => new DecimalTypeInfo(UNLIMITED_DECIMAL_PRECISION, UNLIMITED_DECIMAL_SCALE)
+ }
+
+ def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = {
+ val info = inspector.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+ DecimalType(info.precision(), info.scale())
+ }
+
+ def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = {
+ Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue(), hdoi.precision(), hdoi.scale())
+ }
}
/*