aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorJacky Li <jacky.likun@huawei.com>2015-05-08 15:25:54 -0700
committerMichael Armbrust <michael@databricks.com>2015-05-08 15:25:54 -0700
commit6dad76e5eba3c2925bfc9d142f31f7c2dc649886 (patch)
tree264c07242b5a7b555a7f58b14c2361f2df49db1d /sql/hive
parent90527f560462cc2d693176bd961b02767e460e06 (diff)
downloadspark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.tar.gz
spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.tar.bz2
spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.zip
[SPARK-4699] [SQL] Make caseSensitive configurable in spark sql analyzer
based on #3558 Author: Jacky Li <jacky.likun@huawei.com> Author: wangfei <wangfei1@huawei.com> Author: scwf <wangfei1@huawei.com> Closes #5806 from scwf/case and squashes the following commits: cd51712 [wangfei] fix compile d4b724f [wangfei] address michael's comment af512c7 [wangfei] fix conflicts 4ef1be7 [wangfei] fix conflicts 269cf21 [scwf] fix conflicts b73df6c [scwf] style issue 9e11752 [scwf] improve SimpleCatalystConf b35529e [scwf] minor style a3f7659 [scwf] remove unsed imports 2a56515 [scwf] fix conflicts 6db4bf5 [scwf] also fix for HiveContext 7fc4a98 [scwf] fix test case d5a9933 [wangfei] fix style eee75ba [wangfei] fix EmptyConf 6ef31cf [wangfei] revert pom changes 5d7c456 [wangfei] set CASE_SENSITIVE false in TestHive 966e719 [wangfei] set CASE_SENSITIVE false in hivecontext fd30e25 [wangfei] added override 69b3b70 [wangfei] fix AnalysisSuite 5472b08 [wangfei] fix compile issue 56034ca [wangfei] fix conflicts and improve for catalystconf 664d1e9 [Jacky Li] Merge branch 'master' of https://github.com/apache/spark into case 12eca9a [Jacky Li] solve conflict with master 39e369c [Jacky Li] fix confilct after DataFrame PR dee56e9 [Jacky Li] fix test case failure 05b09a3 [Jacky Li] fix conflict base on the latest master branch 73c16b1 [Jacky Li] fix bug in sql/hive 9bf4cc7 [Jacky Li] fix bug in catalyst 005c56d [Jacky Li] make SQLContext caseSensitivity configurable 6332e0f [Jacky Li] fix bug fcbf0d9 [Jacky Li] fix scalastyle check e7bca31 [Jacky Li] make caseSensitive configuration in Analyzer and Catalog 91b1b96 [Jacky Li] make caseSensitive configurable in Analyzer f57f15c [Jacky Li] add testcase 578d167 [Jacky Li] make caseSensitive configurable
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala5
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala32
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala8
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala1
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala6
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala4
6 files changed, 28 insertions, 28 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 538c6c7f0a..3bab648e31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -46,6 +46,7 @@ import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, Query
import org.apache.spark.sql.hive.client._
import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand}
import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy}
+import org.apache.spark.sql.catalyst.CatalystConf
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
@@ -329,7 +330,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
/* An analyzer that uses the Hive metastore. */
@transient
override protected[sql] lazy val analyzer =
- new Analyzer(catalog, functionRegistry, caseSensitive = false) {
+ new Analyzer(catalog, functionRegistry, conf) {
override val extendedResolutionRules =
catalog.ParquetConversions ::
catalog.CreateTables ::
@@ -350,6 +351,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
protected[hive] class SQLSession extends super.SQLSession {
protected[sql] override lazy val conf: SQLConf = new SQLConf {
override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+ override def caseSensitiveAnalysis: Boolean =
+ getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean
}
/**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 8fcdf3d0ab..f5398605bc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,26 +17,18 @@
package org.apache.spark.sql.hive
-import java.io.IOException
-import java.util.{List => JList}
-
import com.google.common.base.Objects
import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable}
-import org.apache.hadoop.hive.metastore.{TableType, Warehouse}
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.hive.metastore.Warehouse
import org.apache.hadoop.hive.ql.metadata._
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc
-import org.apache.hadoop.hive.serde.serdeConstants
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
-import org.apache.hadoop.util.ReflectionUtils
+import org.apache.hadoop.hive.serde2.Deserializer
import org.apache.spark.Logging
-import org.apache.spark.sql.hive.client.IsolatedClientLoader
import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog}
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Catalog, OverrideCatalog}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.planning.PhysicalOperation
import org.apache.spark.sql.catalyst.plans.logical
@@ -44,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules._
import org.apache.spark.sql.hive.client._
import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
-import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, ResolvedDataSource}
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
@@ -54,7 +46,7 @@ import scala.collection.JavaConversions._
private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext)
extends Catalog with Logging {
- import org.apache.spark.sql.hive.HiveMetastoreTypes._
+ val conf = hive.conf
/** Usages should lock on `this`. */
protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf)
@@ -148,7 +140,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
val tableProperties = new scala.collection.mutable.HashMap[String, String]
tableProperties.put("spark.sql.sources.provider", provider)
if (userSpecifiedSchema.isDefined) {
- val threshold = hive.conf.schemaStringLengthThreshold
+ val threshold = conf.schemaStringLengthThreshold
val schemaJsonString = userSpecifiedSchema.get.json
// Split the JSON string.
val parts = schemaJsonString.grouped(threshold).toSeq
@@ -355,7 +347,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
// Inserting into partitioned table is not supported in Parquet data source (yet).
if !relation.hiveQlTable.isPartitioned &&
hive.convertMetastoreParquet &&
- hive.conf.parquetUseDataSourceApi &&
+ conf.parquetUseDataSourceApi &&
relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
val parquetRelation = convertToParquetRelation(relation)
val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -366,7 +358,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
// Inserting into partitioned table is not supported in Parquet data source (yet).
if !relation.hiveQlTable.isPartitioned &&
hive.convertMetastoreParquet &&
- hive.conf.parquetUseDataSourceApi &&
+ conf.parquetUseDataSourceApi &&
relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
val parquetRelation = convertToParquetRelation(relation)
val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -375,7 +367,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
// Read path
case p @ PhysicalOperation(_, _, relation: MetastoreRelation)
if hive.convertMetastoreParquet &&
- hive.conf.parquetUseDataSourceApi &&
+ conf.parquetUseDataSourceApi &&
relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
val parquetRelation = convertToParquetRelation(relation)
val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -435,7 +427,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
CreateTableUsingAsSelect(
desc.name,
- hive.conf.defaultDataSourceName,
+ conf.defaultDataSourceName,
temporary = false,
mode,
options = Map.empty[String, String],
@@ -464,7 +456,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
CreateTableUsingAsSelect(
tblName,
- hive.conf.defaultDataSourceName,
+ conf.defaultDataSourceName,
temporary = false,
mode,
options = Map.empty[String, String],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1f40a5340c..1598d4bd47 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -26,15 +26,16 @@ import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContaine
import org.apache.hadoop.hive.ql.metadata.Table
import org.apache.hadoop.hive.ql.parse.VariableSubstitution
import org.apache.hadoop.hive.ql.processors._
-import org.apache.hadoop.hive.serde2.RegexSerDe
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.hadoop.hive.serde2.avro.AvroSerDe
-import org.apache.spark.sql.SQLConf
+
+import org.apache.spark.sql.catalyst.CatalystConf
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.CacheTableCommand
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.execution.HiveNativeCommand
+import org.apache.spark.sql.SQLConf
import org.apache.spark.util.Utils
import org.apache.spark.{SparkConf, SparkContext}
@@ -103,10 +104,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
/** Fewer partitions to speed up testing. */
protected[sql] override lazy val conf: SQLConf = new SQLConf {
override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
-
// TODO as in unit test, conf.clear() probably be called, all of the value will be cleared.
// The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql"
override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql")
+ override def caseSensitiveAnalysis: Boolean =
+ getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 7d728fe87b..2c9c08a9f3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.spark.{SparkFiles, SparkException}
import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.functions._
import org.apache.spark.sql.hive._
import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index c605f10175..1353802604 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -493,6 +493,12 @@ class SQLQuerySuite extends QueryTest {
}
}
+ test("SPARK-4699 HiveContext should be case insensitive by default") {
+ checkAnswer(
+ sql("SELECT KEY FROM Src ORDER BY value"),
+ sql("SELECT key FROM src ORDER BY value").collect().toSeq)
+ }
+
test("SPARK-5284 Insert into Hive throws NPE when a inner complex type field has a null value") {
val schema = StructType(
StructField("s",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index d5dd0bf58e..bf1121ddf0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -21,14 +21,12 @@ import java.io.File
import org.scalatest.BeforeAndAfterAll
-import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode}
+import org.apache.spark.sql.{QueryTest, SQLConf}
import org.apache.spark.sql.catalyst.expressions.Row
import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD}
import org.apache.spark.sql.hive.execution.HiveTableScan
-import org.apache.spark.sql.hive.test.TestHive
import org.apache.spark.sql.hive.test.TestHive._
import org.apache.spark.sql.hive.test.TestHive.implicits._
-import org.apache.spark.sql.json.JSONRelation
import org.apache.spark.sql.sources.{InsertIntoDataSource, LogicalRelation}
import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan}
import org.apache.spark.sql.SaveMode