[SPARK-4699] [SQL] Make caseSensitive configurable in spark sql analyzer

based on #3558 Author: Jacky Li <jacky.likun@huawei.com> Author: wangfei <wangfei1@huawei.com> Author: scwf <wangfei1@huawei.com> Closes #5806 from scwf/case and squashes the following commits: cd51712 [wangfei] fix compile d4b724f [wangfei] address michael's comment af512c7 [wangfei] fix conflicts 4ef1be7 [wangfei] fix conflicts 269cf21 [scwf] fix conflicts b73df6c [scwf] style issue 9e11752 [scwf] improve SimpleCatalystConf b35529e [scwf] minor style a3f7659 [scwf] remove unsed imports 2a56515 [scwf] fix conflicts 6db4bf5 [scwf] also fix for HiveContext 7fc4a98 [scwf] fix test case d5a9933 [wangfei] fix style eee75ba [wangfei] fix EmptyConf 6ef31cf [wangfei] revert pom changes 5d7c456 [wangfei] set CASE_SENSITIVE false in TestHive 966e719 [wangfei] set CASE_SENSITIVE false in hivecontext fd30e25 [wangfei] added override 69b3b70 [wangfei] fix AnalysisSuite 5472b08 [wangfei] fix compile issue 56034ca [wangfei] fix conflicts and improve for catalystconf 664d1e9 [Jacky Li] Merge branch 'master' of https://github.com/apache/spark into case 12eca9a [Jacky Li] solve conflict with master 39e369c [Jacky Li] fix confilct after DataFrame PR dee56e9 [Jacky Li] fix test case failure 05b09a3 [Jacky Li] fix conflict base on the latest master branch 73c16b1 [Jacky Li] fix bug in sql/hive 9bf4cc7 [Jacky Li] fix bug in catalyst 005c56d [Jacky Li] make SQLContext caseSensitivity configurable 6332e0f [Jacky Li] fix bug fcbf0d9 [Jacky Li] fix scalastyle check e7bca31 [Jacky Li] make caseSensitive configuration in Analyzer and Catalog 91b1b96 [Jacky Li] make caseSensitive configurable in Analyzer f57f15c [Jacky Li] add testcase 578d167 [Jacky Li] make caseSensitive configurable
author: Jacky Li <jacky.likun@huawei.com> 2015-05-08 15:25:54 -0700
committer: Michael Armbrust <michael@databricks.com> 2015-05-08 15:25:54 -0700
commit: 6dad76e5eba3c2925bfc9d142f31f7c2dc649886 (patch)
tree: 264c07242b5a7b555a7f58b14c2361f2df49db1d /sql/hive/src/main
parent: 90527f560462cc2d693176bd961b02767e460e06 (diff)
download: spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.tar.gz
spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.tar.bz2
spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.zip
3 files changed, 21 insertions, 24 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 538c6c7f0a..3bab648e31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -46,6 +46,7 @@ import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, Query
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand}
 import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy}
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -329,7 +330,7 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   /* An analyzer that uses the Hive metastore. */
   @transient
   override protected[sql] lazy val analyzer =
-    new Analyzer(catalog, functionRegistry, caseSensitive = false) {
+    new Analyzer(catalog, functionRegistry, conf) {
       override val extendedResolutionRules =
         catalog.ParquetConversions ::
         catalog.CreateTables ::
@@ -350,6 +351,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) {
   protected[hive] class SQLSession extends super.SQLSession {
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def dialect: String = getConf(SQLConf.DIALECT, "hiveql")
+      override def caseSensitiveAnalysis: Boolean =
+        getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean
     }
 
     /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 8fcdf3d0ab..f5398605bc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,26 +17,18 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.IOException
-import java.util.{List => JList}
-
 import com.google.common.base.Objects
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
 
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable}
-import org.apache.hadoop.hive.metastore.{TableType, Warehouse}
+import org.apache.hadoop.hive.metastore.api.FieldSchema
+import org.apache.hadoop.hive.metastore.Warehouse
 import org.apache.hadoop.hive.ql.metadata._
-import org.apache.hadoop.hive.ql.plan.CreateTableDesc
-import org.apache.hadoop.hive.serde.serdeConstants
-import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException}
-import org.apache.hadoop.util.ReflectionUtils
+import org.apache.hadoop.hive.serde2.Deserializer
 
 import org.apache.spark.Logging
-import org.apache.spark.sql.hive.client.IsolatedClientLoader
 import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog}
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Catalog, OverrideCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical
@@ -44,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec}
-import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource}
+import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, ResolvedDataSource}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -54,7 +46,7 @@ import scala.collection.JavaConversions._
 private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext)
   extends Catalog with Logging {
 
-  import org.apache.spark.sql.hive.HiveMetastoreTypes._
+  val conf = hive.conf
 
   /** Usages should lock on `this`. */
   protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf)
@@ -148,7 +140,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
     val tableProperties = new scala.collection.mutable.HashMap[String, String]
     tableProperties.put("spark.sql.sources.provider", provider)
     if (userSpecifiedSchema.isDefined) {
-      val threshold = hive.conf.schemaStringLengthThreshold
+      val threshold = conf.schemaStringLengthThreshold
       val schemaJsonString = userSpecifiedSchema.get.json
       // Split the JSON string.
       val parts = schemaJsonString.grouped(threshold).toSeq
@@ -355,7 +347,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
             // Inserting into partitioned table is not supported in Parquet data source (yet).
             if !relation.hiveQlTable.isPartitioned &&
               hive.convertMetastoreParquet &&
-              hive.conf.parquetUseDataSourceApi &&
+              conf.parquetUseDataSourceApi &&
               relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
           val parquetRelation = convertToParquetRelation(relation)
           val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -366,7 +358,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           // Inserting into partitioned table is not supported in Parquet data source (yet).
           if !relation.hiveQlTable.isPartitioned &&
             hive.convertMetastoreParquet &&
-            hive.conf.parquetUseDataSourceApi &&
+            conf.parquetUseDataSourceApi &&
             relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
           val parquetRelation = convertToParquetRelation(relation)
           val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -375,7 +367,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
         // Read path
         case p @ PhysicalOperation(_, _, relation: MetastoreRelation)
             if hive.convertMetastoreParquet &&
-              hive.conf.parquetUseDataSourceApi &&
+              conf.parquetUseDataSourceApi &&
               relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") =>
           val parquetRelation = convertToParquetRelation(relation)
           val attributedRewrites = relation.output.zip(parquetRelation.output)
@@ -435,7 +427,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
             desc.name,
-            hive.conf.defaultDataSourceName,
+            conf.defaultDataSourceName,
             temporary = false,
             mode,
             options = Map.empty[String, String],
@@ -464,7 +456,7 @@ private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: Hive
           val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists
           CreateTableUsingAsSelect(
             tblName,
-            hive.conf.defaultDataSourceName,
+            conf.defaultDataSourceName,
             temporary = false,
             mode,
             options = Map.empty[String, String],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 1f40a5340c..1598d4bd47 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -26,15 +26,16 @@ import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContaine
 import org.apache.hadoop.hive.ql.metadata.Table
 import org.apache.hadoop.hive.ql.parse.VariableSubstitution
 import org.apache.hadoop.hive.ql.processors._
-import org.apache.hadoop.hive.serde2.RegexSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.hive.serde2.avro.AvroSerDe
-import org.apache.spark.sql.SQLConf
+
+import org.apache.spark.sql.catalyst.CatalystConf
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.execution.HiveNativeCommand
+import org.apache.spark.sql.SQLConf
 import org.apache.spark.util.Utils
 import org.apache.spark.{SparkConf, SparkContext}
 
@@ -103,10 +104,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
     /** Fewer partitions to speed up testing. */
     protected[sql] override lazy val conf: SQLConf = new SQLConf {
       override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt
-
       // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared.
       // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql"
       override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql")
+      override def caseSensitiveAnalysis: Boolean =
+        getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean
     }
   }
author	Jacky Li <jacky.likun@huawei.com>	2015-05-08 15:25:54 -0700
committer	Michael Armbrust <michael@databricks.com>	2015-05-08 15:25:54 -0700
commit	6dad76e5eba3c2925bfc9d142f31f7c2dc649886 (patch)
tree	264c07242b5a7b555a7f58b14c2361f2df49db1d /sql/hive/src/main
parent	90527f560462cc2d693176bd961b02767e460e06 (diff)
download	spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.tar.gz spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.tar.bz2 spark-6dad76e5eba3c2925bfc9d142f31f7c2dc649886.zip