aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-09-08 19:41:49 +0800
committerWenchen Fan <wenchen@databricks.com>2016-09-08 19:41:49 +0800
commit3ced39df32e52170d6954a2464f84e0c9f307423 (patch)
tree94c1a6de4ed76cd3384a65293a9d0de6e8d6269c /sql
parentb230fb92a53375b648fa0f9e1d852270156d79e5 (diff)
downloadspark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.gz
spark-3ced39df32e52170d6954a2464f84e0c9f307423.tar.bz2
spark-3ced39df32e52170d6954a2464f84e0c9f307423.zip
[SPARK-17432][SQL] PreprocessDDL should respect case sensitivity when checking duplicated columns
## What changes were proposed in this pull request? In `PreprocessDDL` we will check if table columns are duplicated. However, this checking ignores case sensitivity config(it's always case-sensitive) and lead to different result between `HiveExternalCatalog` and `InMemoryCatalog`. `HiveExternalCatalog` will throw exception because hive metastore is always case-nonsensitive, and `InMemoryCatalog` is fine. This PR fixes it. ## How was this patch tested? a new test in DDLSuite Author: Wenchen Fan <wenchen@databricks.com> Closes #14994 from cloud-fan/check-dup.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala7
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala7
2 files changed, 13 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5b96206ba8..fbf4063ff6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -97,7 +97,12 @@ case class PreprocessDDL(conf: SQLConf) extends Rule[LogicalPlan] {
// * sort columns' type must be orderable.
case c @ CreateTable(tableDesc, mode, query) if c.childrenResolved =>
val schema = if (query.isDefined) query.get.schema else tableDesc.schema
- checkDuplication(schema.map(_.name), "table definition of " + tableDesc.identifier)
+ val columnNames = if (conf.caseSensitiveAnalysis) {
+ schema.map(_.name)
+ } else {
+ schema.map(_.name.toLowerCase)
+ }
+ checkDuplication(columnNames, "table definition of " + tableDesc.identifier)
val partitionColsChecked = checkPartitionColumns(schema, tableDesc)
val bucketColsChecked = checkBucketColumns(schema, partitionColsChecked)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fd35c987ca..05f826a11b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -371,6 +371,13 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
sql("CREATE TABLE tbl(a int, a string) USING json")
}
assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a")
+
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+ val e2 = intercept[AnalysisException] {
+ sql("CREATE TABLE tbl(a int, A string) USING json")
+ }
+ assert(e2.message == "Found duplicate column(s) in table definition of `tbl`: a")
+ }
}
test("create table - partition column names not in table definition") {