aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2017-01-12 22:52:34 -0800
committergatorsmile <gatorsmile@gmail.com>2017-01-12 22:52:34 -0800
commit6b34e745bb8bdcf5a8bb78359fa39bbe8c6563cc (patch)
treeec818366a8134bbe1a62e0770e712694410d9860 /sql/catalyst/src/main
parent7f24a0b6c32c56a38cf879d953bbd523922ab9c9 (diff)
downloadspark-6b34e745bb8bdcf5a8bb78359fa39bbe8c6563cc.tar.gz
spark-6b34e745bb8bdcf5a8bb78359fa39bbe8c6563cc.tar.bz2
spark-6b34e745bb8bdcf5a8bb78359fa39bbe8c6563cc.zip
[SPARK-19178][SQL] convert string of large numbers to int should return null
## What changes were proposed in this pull request? When we convert a string to integral, we will convert that string to `decimal(20, 0)` first, so that we can turn a string with decimal format to truncated integral, e.g. `CAST('1.2' AS int)` will return `1`. However, this brings problems when we convert a string with large numbers to integral, e.g. `CAST('1234567890123' AS int)` will return `1912276171`, while Hive returns null as we expected. This is a long standing bug(seems it was there the first day Spark SQL was created), this PR fixes this bug by adding the native support to convert `UTF8String` to integral. ## How was this patch tested? new regression tests Author: Wenchen Fan <wenchen@databricks.com> Closes #16550 from cloud-fan/string-to-int.
Diffstat (limited to 'sql/catalyst/src/main')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala16
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala18
2 files changed, 9 insertions, 25 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index cd73f9c897..5f72fa8536 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -51,7 +51,6 @@ object TypeCoercion {
PromoteStrings ::
DecimalPrecision ::
BooleanEquality ::
- StringToIntegralCasts ::
FunctionArgumentConversion ::
CaseWhenCoercion ::
IfCoercion ::
@@ -429,21 +428,6 @@ object TypeCoercion {
}
/**
- * When encountering a cast from a string representing a valid fractional number to an integral
- * type the jvm will throw a `java.lang.NumberFormatException`. Hive, in contrast, returns the
- * truncated version of this number.
- */
- object StringToIntegralCasts extends Rule[LogicalPlan] {
- def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
- // Skip nodes who's children have not been resolved yet.
- case e if !e.childrenResolved => e
-
- case Cast(e @ StringType(), t: IntegralType) =>
- Cast(Cast(e, DecimalType.forType(LongType)), t)
- }
- }
-
- /**
* This ensure that the types for various functions are as expected.
*/
object FunctionArgumentConversion extends Rule[LogicalPlan] {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 741730e3e0..14e275bf88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -247,7 +247,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
// LongConverter
private[this] def castToLong(from: DataType): Any => Any = from match {
case StringType =>
- buildCast[UTF8String](_, s => try s.toString.toLong catch {
+ buildCast[UTF8String](_, s => try s.toLong catch {
case _: NumberFormatException => null
})
case BooleanType =>
@@ -263,7 +263,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
// IntConverter
private[this] def castToInt(from: DataType): Any => Any = from match {
case StringType =>
- buildCast[UTF8String](_, s => try s.toString.toInt catch {
+ buildCast[UTF8String](_, s => try s.toInt catch {
case _: NumberFormatException => null
})
case BooleanType =>
@@ -279,7 +279,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
// ShortConverter
private[this] def castToShort(from: DataType): Any => Any = from match {
case StringType =>
- buildCast[UTF8String](_, s => try s.toString.toShort catch {
+ buildCast[UTF8String](_, s => try s.toShort catch {
case _: NumberFormatException => null
})
case BooleanType =>
@@ -295,7 +295,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
// ByteConverter
private[this] def castToByte(from: DataType): Any => Any = from match {
case StringType =>
- buildCast[UTF8String](_, s => try s.toString.toByte catch {
+ buildCast[UTF8String](_, s => try s.toByte catch {
case _: NumberFormatException => null
})
case BooleanType =>
@@ -498,7 +498,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
s"""
boolean $resultNull = $childNull;
${ctx.javaType(resultType)} $resultPrim = ${ctx.defaultValue(resultType)};
- if (!${childNull}) {
+ if (!$childNull) {
${cast(childPrim, resultPrim, resultNull)}
}
"""
@@ -705,7 +705,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
(c, evPrim, evNull) =>
s"""
try {
- $evPrim = Byte.valueOf($c.toString());
+ $evPrim = $c.toByte();
} catch (java.lang.NumberFormatException e) {
$evNull = true;
}
@@ -727,7 +727,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
(c, evPrim, evNull) =>
s"""
try {
- $evPrim = Short.valueOf($c.toString());
+ $evPrim = $c.toShort();
} catch (java.lang.NumberFormatException e) {
$evNull = true;
}
@@ -749,7 +749,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
(c, evPrim, evNull) =>
s"""
try {
- $evPrim = Integer.valueOf($c.toString());
+ $evPrim = $c.toInt();
} catch (java.lang.NumberFormatException e) {
$evNull = true;
}
@@ -771,7 +771,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
(c, evPrim, evNull) =>
s"""
try {
- $evPrim = Long.valueOf($c.toString());
+ $evPrim = $c.toLong();
} catch (java.lang.NumberFormatException e) {
$evNull = true;
}