aboutsummaryrefslogtreecommitdiff
path: root/external
diff options
context:
space:
mode:
authorYuming Wang <wgyumg@gmail.com>2016-08-05 16:11:54 +0100
committerSean Owen <sowen@cloudera.com>2016-08-05 16:11:54 +0100
commit39a2b2ea74d420caa37019e3684f65b3a6fcb388 (patch)
tree7d98547f7167129d7fd7b9b4843786de68026bc2 /external
parente026064143367e4614cb866e321cc521fdde3170 (diff)
downloadspark-39a2b2ea74d420caa37019e3684f65b3a6fcb388.tar.gz
spark-39a2b2ea74d420caa37019e3684f65b3a6fcb388.tar.bz2
spark-39a2b2ea74d420caa37019e3684f65b3a6fcb388.zip
[SPARK-16625][SQL] General data types to be mapped to Oracle
## What changes were proposed in this pull request? Spark will convert **BooleanType** to **BIT(1)**, **LongType** to **BIGINT**, **ByteType** to **BYTE** when saving DataFrame to Oracle, but Oracle does not support BIT, BIGINT and BYTE types. This PR is convert following _Spark Types_ to _Oracle types_ refer to [Oracle Developer's Guide](https://docs.oracle.com/cd/E19501-01/819-3659/gcmaz/) Spark Type | Oracle ----|---- BooleanType | NUMBER(1) IntegerType | NUMBER(10) LongType | NUMBER(19) FloatType | NUMBER(19, 4) DoubleType | NUMBER(19, 4) ByteType | NUMBER(3) ShortType | NUMBER(5) ## How was this patch tested? Add new tests in [JDBCSuite.scala](https://github.com/wangyum/spark/commit/22b0c2a4228cb8b5098ad741ddf4d1904e745ff6#diff-dc4b58851b084b274df6fe6b189db84d) and [OracleDialect.scala](https://github.com/wangyum/spark/commit/22b0c2a4228cb8b5098ad741ddf4d1904e745ff6#diff-5e0cadf526662f9281aa26315b3750ad) Author: Yuming Wang <wgyumg@gmail.com> Closes #14377 from wangyum/SPARK-16625.
Diffstat (limited to 'external')
-rw-r--r--external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala74
1 files changed, 73 insertions, 1 deletions
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index c5e1f8607b..8c880f3ee5 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -17,10 +17,12 @@
package org.apache.spark.sql.jdbc
-import java.sql.Connection
+import java.sql.{Connection, Date, Timestamp}
import java.util.Properties
+import org.apache.spark.sql.Row
import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types._
import org.apache.spark.tags.DockerTest
/**
@@ -77,4 +79,74 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
// verify the value is the inserted correct or not
assert(rows(0).getString(0).equals("foo"))
}
+
+ test("SPARK-16625: General data types to be mapped to Oracle") {
+ val props = new Properties()
+ props.put("oracle.jdbc.mapDateToTimestamp", "false")
+
+ val schema = StructType(Seq(
+ StructField("boolean_type", BooleanType, true),
+ StructField("integer_type", IntegerType, true),
+ StructField("long_type", LongType, true),
+ StructField("float_Type", FloatType, true),
+ StructField("double_type", DoubleType, true),
+ StructField("byte_type", ByteType, true),
+ StructField("short_type", ShortType, true),
+ StructField("string_type", StringType, true),
+ StructField("binary_type", BinaryType, true),
+ StructField("date_type", DateType, true),
+ StructField("timestamp_type", TimestampType, true)
+ ))
+
+ val tableName = "test_oracle_general_types"
+ val booleanVal = true
+ val integerVal = 1
+ val longVal = 2L
+ val floatVal = 3.0f
+ val doubleVal = 4.0
+ val byteVal = 2.toByte
+ val shortVal = 5.toShort
+ val stringVal = "string"
+ val binaryVal = Array[Byte](6, 7, 8)
+ val dateVal = Date.valueOf("2016-07-26")
+ val timestampVal = Timestamp.valueOf("2016-07-26 11:49:45")
+
+ val data = spark.sparkContext.parallelize(Seq(
+ Row(
+ booleanVal, integerVal, longVal, floatVal, doubleVal, byteVal, shortVal, stringVal,
+ binaryVal, dateVal, timestampVal
+ )))
+
+ val dfWrite = spark.createDataFrame(data, schema)
+ dfWrite.write.jdbc(jdbcUrl, tableName, props)
+
+ val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
+ val rows = dfRead.collect()
+ // verify the data type is inserted
+ val types = rows(0).toSeq.map(x => x.getClass.toString)
+ assert(types(0).equals("class java.lang.Boolean"))
+ assert(types(1).equals("class java.lang.Integer"))
+ assert(types(2).equals("class java.lang.Long"))
+ assert(types(3).equals("class java.lang.Float"))
+ assert(types(4).equals("class java.lang.Float"))
+ assert(types(5).equals("class java.lang.Integer"))
+ assert(types(6).equals("class java.lang.Integer"))
+ assert(types(7).equals("class java.lang.String"))
+ assert(types(8).equals("class [B"))
+ assert(types(9).equals("class java.sql.Date"))
+ assert(types(10).equals("class java.sql.Timestamp"))
+ // verify the value is the inserted correct or not
+ val values = rows(0)
+ assert(values.getBoolean(0).equals(booleanVal))
+ assert(values.getInt(1).equals(integerVal))
+ assert(values.getLong(2).equals(longVal))
+ assert(values.getFloat(3).equals(floatVal))
+ assert(values.getFloat(4).equals(doubleVal.toFloat))
+ assert(values.getInt(5).equals(byteVal.toInt))
+ assert(values.getInt(6).equals(shortVal.toInt))
+ assert(values.getString(7).equals(stringVal))
+ assert(values.getAs[Array[Byte]](8).mkString.equals("678"))
+ assert(values.getDate(9).equals(dateVal))
+ assert(values.getTimestamp(10).equals(timestampVal))
+ }
}