[SPARK-17432][SQL] PreprocessDDL should respect case sensitivity when checking duplicated columns

## What changes were proposed in this pull request? In `PreprocessDDL` we will check if table columns are duplicated. However, this checking ignores case sensitivity config(it's always case-sensitive) and lead to different result between `HiveExternalCatalog` and `InMemoryCatalog`. `HiveExternalCatalog` will throw exception because hive metastore is always case-nonsensitive, and `InMemoryCatalog` is fine. This PR fixes it. ## How was this patch tested? a new test in DDLSuite Author: Wenchen Fan <wenchen@databricks.com> Closes #14994 from cloud-fan/check-dup.
author: Wenchen Fan <wenchen@databricks.com> 2016-09-08 19:41:49 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2016-09-08 19:41:49 +0800
commit: 3ced39df32e52170d6954a2464f84e0c9f307423 (patch)
tree: 94c1a6de4ed76cd3384a65293a9d0de6e8d6269c /sql
parent: b230fb92a53375b648fa0f9e1d852270156d79e5 (diff)
download: spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.gz
spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.bz2
spark-3ced39df32e52170d6954a2464f84e0c9f307423.zip
2 files changed, 13 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5b96206ba8..fbf4063ff6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -97,7 +97,12 @@ case class PreprocessDDL(conf: SQLConf) extends Rule[LogicalPlan] {
     //   * sort columns' type must be orderable.
     case c @ CreateTable(tableDesc, mode, query) if c.childrenResolved =>
       val schema = if (query.isDefined) query.get.schema else tableDesc.schema
-      checkDuplication(schema.map(_.name), "table definition of " + tableDesc.identifier)
+      val columnNames = if (conf.caseSensitiveAnalysis) {
+        schema.map(_.name)
+      } else {
+        schema.map(_.name.toLowerCase)
+      }
+      checkDuplication(columnNames, "table definition of " + tableDesc.identifier)
 
       val partitionColsChecked = checkPartitionColumns(schema, tableDesc)
       val bucketColsChecked = checkBucketColumns(schema, partitionColsChecked)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fd35c987ca..05f826a11b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -371,6 +371,13 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       sql("CREATE TABLE tbl(a int, a string) USING json")
     }
     assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a")
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val e2 = intercept[AnalysisException] {
+        sql("CREATE TABLE tbl(a int, A string) USING json")
+      }
+      assert(e2.message == "Found duplicate column(s) in table definition of `tbl`: a")
+    }
   }
 
   test("create table - partition column names not in table definition") {
author	Wenchen Fan <wenchen@databricks.com>	2016-09-08 19:41:49 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2016-09-08 19:41:49 +0800
commit	3ced39df32e52170d6954a2464f84e0c9f307423 (patch)
tree	94c1a6de4ed76cd3384a65293a9d0de6e8d6269c /sql
parent	b230fb92a53375b648fa0f9e1d852270156d79e5 (diff)
download	spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.gz spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.bz2 spark-3ced39df32e52170d6954a2464f84e0c9f307423.zip