diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-09-08 19:41:49 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-09-08 19:41:49 +0800 |
commit | 3ced39df32e52170d6954a2464f84e0c9f307423 (patch) | |
tree | 94c1a6de4ed76cd3384a65293a9d0de6e8d6269c /sql | |
parent | b230fb92a53375b648fa0f9e1d852270156d79e5 (diff) | |
download | spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.gz spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.bz2 spark-3ced39df32e52170d6954a2464f84e0c9f307423.zip |
[SPARK-17432][SQL] PreprocessDDL should respect case sensitivity when checking duplicated columns
## What changes were proposed in this pull request?
In `PreprocessDDL` we will check if table columns are duplicated. However, this checking ignores case sensitivity config(it's always case-sensitive) and lead to different result between `HiveExternalCatalog` and `InMemoryCatalog`. `HiveExternalCatalog` will throw exception because hive metastore is always case-nonsensitive, and `InMemoryCatalog` is fine.
This PR fixes it.
## How was this patch tested?
a new test in DDLSuite
Author: Wenchen Fan <wenchen@databricks.com>
Closes #14994 from cloud-fan/check-dup.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala | 7 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 7 |
2 files changed, 13 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 5b96206ba8..fbf4063ff6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -97,7 +97,12 @@ case class PreprocessDDL(conf: SQLConf) extends Rule[LogicalPlan] { // * sort columns' type must be orderable. case c @ CreateTable(tableDesc, mode, query) if c.childrenResolved => val schema = if (query.isDefined) query.get.schema else tableDesc.schema - checkDuplication(schema.map(_.name), "table definition of " + tableDesc.identifier) + val columnNames = if (conf.caseSensitiveAnalysis) { + schema.map(_.name) + } else { + schema.map(_.name.toLowerCase) + } + checkDuplication(columnNames, "table definition of " + tableDesc.identifier) val partitionColsChecked = checkPartitionColumns(schema, tableDesc) val bucketColsChecked = checkBucketColumns(schema, partitionColsChecked) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index fd35c987ca..05f826a11b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -371,6 +371,13 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { sql("CREATE TABLE tbl(a int, a string) USING json") } assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a") + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + val e2 = intercept[AnalysisException] { + sql("CREATE TABLE tbl(a int, A string) USING json") + } + assert(e2.message == "Found duplicate column(s) in table definition of `tbl`: a") + } } test("create table - partition column names not in table definition") { |