From 39a2b2ea74d420caa37019e3684f65b3a6fcb388 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 5 Aug 2016 16:11:54 +0100 Subject: [SPARK-16625][SQL] General data types to be mapped to Oracle ## What changes were proposed in this pull request? Spark will convert **BooleanType** to **BIT(1)**, **LongType** to **BIGINT**, **ByteType** to **BYTE** when saving DataFrame to Oracle, but Oracle does not support BIT, BIGINT and BYTE types. This PR is convert following _Spark Types_ to _Oracle types_ refer to [Oracle Developer's Guide](https://docs.oracle.com/cd/E19501-01/819-3659/gcmaz/) Spark Type | Oracle ----|---- BooleanType | NUMBER(1) IntegerType | NUMBER(10) LongType | NUMBER(19) FloatType | NUMBER(19, 4) DoubleType | NUMBER(19, 4) ByteType | NUMBER(3) ShortType | NUMBER(5) ## How was this patch tested? Add new tests in [JDBCSuite.scala](https://github.com/wangyum/spark/commit/22b0c2a4228cb8b5098ad741ddf4d1904e745ff6#diff-dc4b58851b084b274df6fe6b189db84d) and [OracleDialect.scala](https://github.com/wangyum/spark/commit/22b0c2a4228cb8b5098ad741ddf4d1904e745ff6#diff-5e0cadf526662f9281aa26315b3750ad) Author: Yuming Wang Closes #14377 from wangyum/SPARK-16625. --- .../org/apache/spark/sql/jdbc/OracleDialect.scala | 46 ++++++++++++++-------- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 21 ++++++++++ 2 files changed, 51 insertions(+), 16 deletions(-) (limited to 'sql/core/src') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index ce8731efd1..f541996b65 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -28,28 +28,42 @@ private case object OracleDialect extends JdbcDialect { override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { - // Handle NUMBER fields that have no precision/scale in special way - // because JDBC ResultSetMetaData converts this to 0 precision and -127 scale - // For more details, please see - // https://github.com/apache/spark/pull/8780#issuecomment-145598968 - // and - // https://github.com/apache/spark/pull/8780#issuecomment-144541760 - if (sqlType == Types.NUMERIC && size == 0) { - // This is sub-optimal as we have to pick a precision/scale in advance whereas the data - // in Oracle is allowed to have different precision/scale for each value. - Option(DecimalType(DecimalType.MAX_PRECISION, 10)) - } else if (sqlType == Types.NUMERIC && md.build().getLong("scale") == -127) { - // Handle FLOAT fields in a special way because JDBC ResultSetMetaData converts - // this to NUMERIC with -127 scale - // Not sure if there is a more robust way to identify the field as a float (or other - // numeric types that do not specify a scale. - Option(DecimalType(DecimalType.MAX_PRECISION, 10)) + if (sqlType == Types.NUMERIC) { + val scale = if (null != md) md.build().getLong("scale") else 0L + size match { + // Handle NUMBER fields that have no precision/scale in special way + // because JDBC ResultSetMetaData converts this to 0 precision and -127 scale + // For more details, please see + // https://github.com/apache/spark/pull/8780#issuecomment-145598968 + // and + // https://github.com/apache/spark/pull/8780#issuecomment-144541760 + case 0 => Option(DecimalType(DecimalType.MAX_PRECISION, 10)) + // Handle FLOAT fields in a special way because JDBC ResultSetMetaData converts + // this to NUMERIC with -127 scale + // Not sure if there is a more robust way to identify the field as a float (or other + // numeric types that do not specify a scale. + case _ if scale == -127L => Option(DecimalType(DecimalType.MAX_PRECISION, 10)) + case 1 => Option(BooleanType) + case 3 | 5 | 10 => Option(IntegerType) + case 19 if scale == 0L => Option(LongType) + case 19 if scale == 4L => Option(FloatType) + case _ => None + } } else { None } } override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { + // For more details, please see + // https://docs.oracle.com/cd/E19501-01/819-3659/gcmaz/ + case BooleanType => Some(JdbcType("NUMBER(1)", java.sql.Types.BOOLEAN)) + case IntegerType => Some(JdbcType("NUMBER(10)", java.sql.Types.INTEGER)) + case LongType => Some(JdbcType("NUMBER(19)", java.sql.Types.BIGINT)) + case FloatType => Some(JdbcType("NUMBER(19, 4)", java.sql.Types.FLOAT)) + case DoubleType => Some(JdbcType("NUMBER(19, 4)", java.sql.Types.DOUBLE)) + case ByteType => Some(JdbcType("NUMBER(3)", java.sql.Types.SMALLINT)) + case ShortType => Some(JdbcType("NUMBER(5)", java.sql.Types.SMALLINT)) case StringType => Some(JdbcType("VARCHAR2(255)", java.sql.Types.VARCHAR)) case _ => None } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index 995b1200a2..2d8ee338a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -739,6 +739,27 @@ class JDBCSuite extends SparkFunSuite map(_.databaseTypeDefinition).get == "VARCHAR2(255)") } + test("SPARK-16625: General data types to be mapped to Oracle") { + + def getJdbcType(dialect: JdbcDialect, dt: DataType): String = { + dialect.getJDBCType(dt).orElse(JdbcUtils.getCommonJDBCType(dt)). + map(_.databaseTypeDefinition).get + } + + val oracleDialect = JdbcDialects.get("jdbc:oracle://127.0.0.1/db") + assert(getJdbcType(oracleDialect, BooleanType) == "NUMBER(1)") + assert(getJdbcType(oracleDialect, IntegerType) == "NUMBER(10)") + assert(getJdbcType(oracleDialect, LongType) == "NUMBER(19)") + assert(getJdbcType(oracleDialect, FloatType) == "NUMBER(19, 4)") + assert(getJdbcType(oracleDialect, DoubleType) == "NUMBER(19, 4)") + assert(getJdbcType(oracleDialect, ByteType) == "NUMBER(3)") + assert(getJdbcType(oracleDialect, ShortType) == "NUMBER(5)") + assert(getJdbcType(oracleDialect, StringType) == "VARCHAR2(255)") + assert(getJdbcType(oracleDialect, BinaryType) == "BLOB") + assert(getJdbcType(oracleDialect, DateType) == "DATE") + assert(getJdbcType(oracleDialect, TimestampType) == "TIMESTAMP") + } + private def assertEmptyQuery(sqlString: String): Unit = { assert(sql(sqlString).collect().isEmpty) } -- cgit v1.2.3