aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2015-05-18 01:10:55 -0700
committerReynold Xin <rxin@databricks.com>2015-05-18 01:10:55 -0700
commite32c0f69f38ad729e25c2d5f90eb73b4453f8279 (patch)
tree3e2ba4e52f80713de2979316a9063186cb7dc6b6 /sql/core
parent775e6f9909d4495cbc11c377508b43482d782742 (diff)
downloadspark-e32c0f69f38ad729e25c2d5f90eb73b4453f8279.tar.gz
spark-e32c0f69f38ad729e25c2d5f90eb73b4453f8279.tar.bz2
spark-e32c0f69f38ad729e25c2d5f90eb73b4453f8279.zip
[SPARK-7299][SQL] Set precision and scale for Decimal according to JDBC metadata instead of returned BigDecimal
JIRA: https://issues.apache.org/jira/browse/SPARK-7299 When connecting with oracle db through jdbc, the precision and scale of `BigDecimal` object returned by `ResultSet.getBigDecimal` is not correctly matched to the table schema reported by `ResultSetMetaData.getPrecision` and `ResultSetMetaData.getScale`. So in case you insert a value like `19999` into a column with `NUMBER(12, 2)` type, you get through a `BigDecimal` object with scale as 0. But the dataframe schema has correct type as `DecimalType(12, 2)`. Thus, after you save the dataframe into parquet file and then retrieve it, you will get wrong result `199.99`. Because it is reported to be problematic on jdbc connection with oracle db. It might be difficult to add test case for it. But according to the user's test on JIRA, it solves this problem. Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #5833 from viirya/jdbc_decimal_precision and squashes the following commits: 69bc2b5 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into jdbc_decimal_precision 928f864 [Liang-Chi Hsieh] Add comments. 5f9da94 [Liang-Chi Hsieh] Set up Decimal's precision and scale according to table schema instead of returned BigDecimal.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala23
1 files changed, 19 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
index 95935ba874..4189dfcf95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala
@@ -300,7 +300,7 @@ private[sql] class JDBCRDD(
abstract class JDBCConversion
case object BooleanConversion extends JDBCConversion
case object DateConversion extends JDBCConversion
- case object DecimalConversion extends JDBCConversion
+ case class DecimalConversion(precisionInfo: Option[(Int, Int)]) extends JDBCConversion
case object DoubleConversion extends JDBCConversion
case object FloatConversion extends JDBCConversion
case object IntegerConversion extends JDBCConversion
@@ -317,8 +317,8 @@ private[sql] class JDBCRDD(
schema.fields.map(sf => sf.dataType match {
case BooleanType => BooleanConversion
case DateType => DateConversion
- case DecimalType.Unlimited => DecimalConversion
- case DecimalType.Fixed(d) => DecimalConversion
+ case DecimalType.Unlimited => DecimalConversion(None)
+ case DecimalType.Fixed(d) => DecimalConversion(Some(d))
case DoubleType => DoubleConversion
case FloatType => FloatConversion
case IntegerType => IntegerConversion
@@ -375,7 +375,22 @@ private[sql] class JDBCRDD(
} else {
mutableRow.update(i, null)
}
- case DecimalConversion =>
+ // When connecting with Oracle DB through JDBC, the precision and scale of BigDecimal
+ // object returned by ResultSet.getBigDecimal is not correctly matched to the table
+ // schema reported by ResultSetMetaData.getPrecision and ResultSetMetaData.getScale.
+ // If inserting values like 19999 into a column with NUMBER(12, 2) type, you get through
+ // a BigDecimal object with scale as 0. But the dataframe schema has correct type as
+ // DecimalType(12, 2). Thus, after saving the dataframe into parquet file and then
+ // retrieve it, you will get wrong result 199.99.
+ // So it is needed to set precision and scale for Decimal based on JDBC metadata.
+ case DecimalConversion(Some((p, s))) =>
+ val decimalVal = rs.getBigDecimal(pos)
+ if (decimalVal == null) {
+ mutableRow.update(i, null)
+ } else {
+ mutableRow.update(i, Decimal(decimalVal, p, s))
+ }
+ case DecimalConversion(None) =>
val decimalVal = rs.getBigDecimal(pos)
if (decimalVal == null) {
mutableRow.update(i, null)