diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-11-16 17:12:18 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-11-16 17:12:18 +0800 |
commit | 74f5c2176d8449e41f520febd38109edaf3f4172 (patch) | |
tree | 6c21dd0924f8c83ee4b12df9bb92a5e822f2f5c0 /sql/hive/src | |
parent | 95eb06bd7d0f7110ef62c8d1cb6337c72b10d99f (diff) | |
download | spark-74f5c2176d8449e41f520febd38109edaf3f4172.tar.gz spark-74f5c2176d8449e41f520febd38109edaf3f4172.tar.bz2 spark-74f5c2176d8449e41f520febd38109edaf3f4172.zip |
[SPARK-18433][SQL] Improve DataSource option keys to be more case-insensitive
## What changes were proposed in this pull request?
This PR aims to improve DataSource option keys to be more case-insensitive
DataSource partially use CaseInsensitiveMap in code-path. For example, the following fails to find url.
```scala
val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
df.write.format("jdbc")
.option("UrL", url1)
.option("dbtable", "TEST.SAVETEST")
.options(properties.asScala)
.save()
```
This PR makes DataSource options to use CaseInsensitiveMap internally and also makes DataSource to use CaseInsensitiveMap generally except `InMemoryFileIndex` and `InsertIntoHadoopFsRelationCommand`. We can not pass them CaseInsensitiveMap because they creates new case-sensitive HadoopConfs by calling newHadoopConfWithOptions(options) inside.
## How was this patch tested?
Pass the Jenkins test with newly added test cases.
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #15884 from dongjoon-hyun/SPARK-18433.
Diffstat (limited to 'sql/hive/src')
4 files changed, 11 insertions, 2 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 42ce1a88a2..cbd00da81c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -35,8 +35,8 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics} +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils} -import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.HiveSerDe import org.apache.spark.sql.internal.StaticSQLConf._ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala index c2a126d3bf..ac587ab99a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala @@ -17,14 +17,18 @@ package org.apache.spark.sql.hive.orc +import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap + /** * Options for the ORC data source. */ -private[orc] class OrcOptions(@transient private val parameters: Map[String, String]) +private[orc] class OrcOptions(@transient private val parameters: CaseInsensitiveMap) extends Serializable { import OrcOptions._ + def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters)) + /** * Compression codec to use. By default snappy compression. * Acceptable values are defined in [[shortOrcCompressionCodecNames]]. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index 0f37cd7bf3..12f948041a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -146,6 +146,10 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA sql("DROP TABLE IF EXISTS orcNullValues") } + + test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") { + assert(new OrcOptions(Map("Orc.Compress" -> "NONE")).compressionCodec == "NONE") + } } class OrcSourceSuite extends OrcSuite { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index 3644ff952e..2ce60fe589 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.DataSourceScanExec import org.apache.spark.sql.execution.command.ExecutedCommandExec import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand, LogicalRelation} +import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions import org.apache.spark.sql.hive.execution.HiveTableScanExec import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf |