aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorsureshthalamati <suresh.thalamati@gmail.com>2017-02-14 15:34:12 -0800
committerWenchen Fan <wenchen@databricks.com>2017-02-14 15:34:12 -0800
commitf48c5a57d6488d5598534ca5834e008504f464fe (patch)
tree3a27c7bbc6a8401aa2a3a0e8dfcc3ad7b203f811 /sql/core/src
parentda7aef7a0ea921a2d8ee37b0e3939245e4168355 (diff)
downloadspark-f48c5a57d6488d5598534ca5834e008504f464fe.tar.gz
spark-f48c5a57d6488d5598534ca5834e008504f464fe.tar.bz2
spark-f48c5a57d6488d5598534ca5834e008504f464fe.zip
[SPARK-19318][SQL] Fix to treat JDBC connection properties specified by the user in case-sensitive manner.
## What changes were proposed in this pull request? The reason for test failure is that the property “oracle.jdbc.mapDateToTimestamp” set by the test was getting converted into all lower case. Oracle database expects this property in case-sensitive manner. This test was passing in previous releases because connection properties were sent as user specified for the test case scenario. Fixes to handle all option uniformly in case-insensitive manner, converted the JDBC connection properties also to lower case. This PR enhances CaseInsensitiveMap to keep track of input case-sensitive keys , and uses those when creating connection properties that are passed to the JDBC connection. Alternative approach PR https://github.com/apache/spark/pull/16847 is to pass original input keys to JDBC data source by adding check in the Data source class and handle case-insensitivity in the JDBC source code. ## How was this patch tested? Added new test cases to JdbcSuite , and OracleIntegrationSuite. Ran docker integration tests passed on my laptop, all tests passed successfully. Author: sureshthalamati <suresh.thalamati@gmail.com> Closes #16891 from sureshthalamati/jdbc_case_senstivity_props_fix-SPARK-19318.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala10
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala50
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala13
8 files changed, 78 insertions, 17 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index ecfcafe69c..d510581f90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -85,7 +85,7 @@ case class DataSource(
lazy val providingClass: Class[_] = DataSource.lookupDataSource(className)
lazy val sourceInfo: SourceInfo = sourceSchema()
- private val caseInsensitiveOptions = new CaseInsensitiveMap(options)
+ private val caseInsensitiveOptions = CaseInsensitiveMap(options)
/**
* Get the schema of the given FileFormat, if provided by `userSpecifiedSchema`, or try to infer
@@ -610,7 +610,7 @@ object DataSource {
* [[CatalogStorageFormat]]. Note that, the `path` option is removed from options after this.
*/
def buildStorageFormatFromOptions(options: Map[String, String]): CatalogStorageFormat = {
- val path = new CaseInsensitiveMap(options).get("path")
+ val path = CaseInsensitiveMap(options).get("path")
val optionsWithoutPath = options.filterKeys(_.toLowerCase != "path")
CatalogStorageFormat.empty.copy(locationUri = path, properties = optionsWithoutPath)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index af456c8d71..9d79ea6ed1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -26,10 +26,10 @@ import org.apache.commons.lang3.time.FastDateFormat
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, ParseModes}
-private[csv] class CSVOptions(@transient private val parameters: CaseInsensitiveMap)
+private[csv] class CSVOptions(@transient private val parameters: CaseInsensitiveMap[String])
extends Logging with Serializable {
- def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+ def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
private def getChar(paramName: String, default: Char): Char = {
val paramValue = parameters.get(paramName)
@@ -164,7 +164,7 @@ private[csv] class CSVOptions(@transient private val parameters: CaseInsensitive
object CSVOptions {
- def apply(): CSVOptions = new CSVOptions(new CaseInsensitiveMap(Map.empty))
+ def apply(): CSVOptions = new CSVOptions(CaseInsensitiveMap(Map.empty))
def apply(paramName: String, paramValue: String): CSVOptions = {
new CSVOptions(Map(paramName -> paramValue))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 6fd2e0d241..d4d3464654 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -26,15 +26,15 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
* Options for the JDBC data source.
*/
class JDBCOptions(
- @transient private val parameters: CaseInsensitiveMap)
+ @transient private val parameters: CaseInsensitiveMap[String])
extends Serializable {
import JDBCOptions._
- def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+ def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
def this(url: String, table: String, parameters: Map[String, String]) = {
- this(new CaseInsensitiveMap(parameters ++ Map(
+ this(CaseInsensitiveMap(parameters ++ Map(
JDBCOptions.JDBC_URL -> url,
JDBCOptions.JDBC_TABLE_NAME -> table)))
}
@@ -44,7 +44,7 @@ class JDBCOptions(
*/
val asProperties: Properties = {
val properties = new Properties()
- parameters.foreach { case (k, v) => properties.setProperty(k, v) }
+ parameters.originalMap.foreach { case (k, v) => properties.setProperty(k, v) }
properties
}
@@ -55,7 +55,7 @@ class JDBCOptions(
*/
val asConnectionProperties: Properties = {
val properties = new Properties()
- parameters.filterKeys(key => !jdbcOptionNames(key.toLowerCase))
+ parameters.originalMap.filterKeys(key => !jdbcOptionNames(key.toLowerCase))
.foreach { case (k, v) => properties.setProperty(k, v) }
properties
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index a81a95d510..bdda299a62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -26,14 +26,14 @@ import org.apache.spark.sql.internal.SQLConf
* Options for the Parquet data source.
*/
private[parquet] class ParquetOptions(
- @transient private val parameters: CaseInsensitiveMap,
+ @transient private val parameters: CaseInsensitiveMap[String],
@transient private val sqlConf: SQLConf)
extends Serializable {
import ParquetOptions._
def this(parameters: Map[String, String], sqlConf: SQLConf) =
- this(new CaseInsensitiveMap(parameters), sqlConf)
+ this(CaseInsensitiveMap(parameters), sqlConf)
/**
* Compression codec to use. By default use the value specified in SQLConf.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
index 8cad984e33..49bd7382f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
@@ -22,12 +22,12 @@ import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs
/**
* Options for the Text data source.
*/
-private[text] class TextOptions(@transient private val parameters: CaseInsensitiveMap)
+private[text] class TextOptions(@transient private val parameters: CaseInsensitiveMap[String])
extends Serializable {
import TextOptions._
- def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+ def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
/**
* Compression codec to use.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 25ebe1797b..2f802d782f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -26,9 +26,9 @@ import org.apache.spark.util.Utils
/**
* User specified options for file streams.
*/
-class FileStreamOptions(parameters: CaseInsensitiveMap) extends Logging {
+class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging {
- def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+ def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
val maxFilesPerTrigger: Option[Int] = parameters.get("maxFilesPerTrigger").map { str =>
Try(str.toInt).toOption.filter(_ > 0).getOrElse {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 1cca15542d..92d3e9519f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -899,7 +899,7 @@ class JDBCSuite extends SparkFunSuite
"dbtable" -> "t1",
"numPartitions" -> "10")
assert(new JDBCOptions(parameters).asConnectionProperties.isEmpty)
- assert(new JDBCOptions(new CaseInsensitiveMap(parameters)).asConnectionProperties.isEmpty)
+ assert(new JDBCOptions(CaseInsensitiveMap(parameters)).asConnectionProperties.isEmpty)
}
test("SPARK-16848: jdbc API throws an exception for user specified schema") {
@@ -925,4 +925,52 @@ class JDBCSuite extends SparkFunSuite
assert(res.generatedRows.isEmpty)
assert(res.outputRows === foobarCnt :: Nil)
}
+
+ test("SPARK-19318: Connection properties keys should be case-sensitive.") {
+ def testJdbcOptions(options: JDBCOptions): Unit = {
+ // Spark JDBC data source options are case-insensitive
+ assert(options.table == "t1")
+ // When we convert it to properties, it should be case-sensitive.
+ assert(options.asProperties.size == 3)
+ assert(options.asProperties.get("customkey") == null)
+ assert(options.asProperties.get("customKey") == "a-value")
+ assert(options.asConnectionProperties.size == 1)
+ assert(options.asConnectionProperties.get("customkey") == null)
+ assert(options.asConnectionProperties.get("customKey") == "a-value")
+ }
+
+ val parameters = Map("url" -> url, "dbTAblE" -> "t1", "customKey" -> "a-value")
+ testJdbcOptions(new JDBCOptions(parameters))
+ testJdbcOptions(new JDBCOptions(CaseInsensitiveMap(parameters)))
+ // test add/remove key-value from the case-insensitive map
+ var modifiedParameters = CaseInsensitiveMap(Map.empty) ++ parameters
+ testJdbcOptions(new JDBCOptions(modifiedParameters))
+ modifiedParameters -= "dbtable"
+ assert(modifiedParameters.get("dbTAblE").isEmpty)
+ modifiedParameters -= "customkey"
+ assert(modifiedParameters.get("customKey").isEmpty)
+ modifiedParameters += ("customKey" -> "a-value")
+ modifiedParameters += ("dbTable" -> "t1")
+ testJdbcOptions(new JDBCOptions(modifiedParameters))
+ assert ((modifiedParameters -- parameters.keys).size == 0)
+ }
+
+ test("SPARK-19318: jdbc data source options should be treated case-insensitive.") {
+ val df = spark.read.format("jdbc")
+ .option("Url", urlWithUserAndPass)
+ .option("DbTaBle", "TEST.PEOPLE")
+ .load()
+ assert(df.count() == 3)
+
+ withTempView("people_view") {
+ sql(
+ s"""
+ |CREATE TEMPORARY VIEW people_view
+ |USING org.apache.spark.sql.jdbc
+ |OPTIONS (uRl '$url', DbTaBlE 'TEST.PEOPLE', User 'testUser', PassWord 'testPass')
+ """.stripMargin.replaceAll("\n", " "))
+
+ assert(sql("select * from people_view").count() == 3)
+ }
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 354af29d42..ec7b19e666 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -349,4 +349,17 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
assert(e.contains("Invalid value `0` for parameter `numPartitions` in table writing " +
"via JDBC. The minimum value is 1."))
}
+
+ test("SPARK-19318 temporary view data source option keys should be case-insensitive") {
+ withTempView("people_view") {
+ sql(
+ s"""
+ |CREATE TEMPORARY VIEW people_view
+ |USING org.apache.spark.sql.jdbc
+ |OPTIONS (uRl '$url1', DbTaBlE 'TEST.PEOPLE1', User 'testUser', PassWord 'testPass')
+ """.stripMargin.replaceAll("\n", " "))
+ sql("INSERT OVERWRITE TABLE PEOPLE_VIEW SELECT * FROM PEOPLE")
+ assert(sql("select * from people_view").count() == 2)
+ }
+ }
}