[SPARK-18433][SQL] Improve DataSource option keys to be more case-insensitive

## What changes were proposed in this pull request? This PR aims to improve DataSource option keys to be more case-insensitive DataSource partially use CaseInsensitiveMap in code-path. For example, the following fails to find url. ```scala val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2) df.write.format("jdbc") .option("UrL", url1) .option("dbtable", "TEST.SAVETEST") .options(properties.asScala) .save() ``` This PR makes DataSource options to use CaseInsensitiveMap internally and also makes DataSource to use CaseInsensitiveMap generally except `InMemoryFileIndex` and `InsertIntoHadoopFsRelationCommand`. We can not pass them CaseInsensitiveMap because they creates new case-sensitive HadoopConfs by calling newHadoopConfWithOptions(options) inside. ## How was this patch tested? Pass the Jenkins test with newly added test cases. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #15884 from dongjoon-hyun/SPARK-18433.
author: Dongjoon Hyun <dongjoon@apache.org> 2016-11-16 17:12:18 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2016-11-16 17:12:18 +0800
commit: 74f5c2176d8449e41f520febd38109edaf3f4172 (patch)
tree: 6c21dd0924f8c83ee4b12df9bb92a5e822f2f5c0 /sql/hive/src
parent: 95eb06bd7d0f7110ef62c8d1cb6337c72b10d99f (diff)
download: spark-74f5c2176d8449e41f520febd38109edaf3f4172.tar.gz
spark-74f5c2176d8449e41f520febd38109edaf3f4172.tar.bz2
spark-74f5c2176d8449e41f520febd38109edaf3f4172.zip
4 files changed, 11 insertions, 2 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 42ce1a88a2..cbd00da81c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -35,8 +35,8 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils}
-import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
index c2a126d3bf..ac587ab99a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.hive.orc
 
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+
 /**
  * Options for the ORC data source.
  */
-private[orc] class OrcOptions(@transient private val parameters: Map[String, String])
+private[orc] class OrcOptions(@transient private val parameters: CaseInsensitiveMap)
   extends Serializable {
 
   import OrcOptions._
 
+  def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+
   /**
    * Compression codec to use. By default snappy compression.
    * Acceptable values are defined in [[shortOrcCompressionCodecNames]].
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
index 0f37cd7bf3..12f948041a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -146,6 +146,10 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
 
     sql("DROP TABLE IF EXISTS orcNullValues")
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    assert(new OrcOptions(Map("Orc.Compress" -> "NONE")).compressionCodec == "NONE")
+  }
 }
 
 class OrcSourceSuite extends OrcSuite {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 3644ff952e..2ce60fe589 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.command.ExecutedCommandExec
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
 import org.apache.spark.sql.hive.execution.HiveTableScanExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
author	Dongjoon Hyun <dongjoon@apache.org>	2016-11-16 17:12:18 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2016-11-16 17:12:18 +0800
commit	74f5c2176d8449e41f520febd38109edaf3f4172 (patch)
tree	6c21dd0924f8c83ee4b12df9bb92a5e822f2f5c0 /sql/hive/src
parent	95eb06bd7d0f7110ef62c8d1cb6337c72b10d99f (diff)
download	spark-74f5c2176d8449e41f520febd38109edaf3f4172.tar.gz spark-74f5c2176d8449e41f520febd38109edaf3f4172.tar.bz2 spark-74f5c2176d8449e41f520febd38109edaf3f4172.zip