aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgatorsmile <gatorsmile@gmail.com>2016-05-21 23:56:10 -0700
committerWenchen Fan <wenchen@databricks.com>2016-05-21 23:56:10 -0700
commita11175eecacd4a57325dab29fff9ebfad819f22f (patch)
treedf3ccb200654ef7a29e59d7af9c542ceb19f8e5a
parent6d0bfb9601b02749d673bbceea5c36e5bc1c3825 (diff)
downloadspark-a11175eecacd4a57325dab29fff9ebfad819f22f.tar.gz
spark-a11175eecacd4a57325dab29fff9ebfad819f22f.tar.bz2
spark-a11175eecacd4a57325dab29fff9ebfad819f22f.zip
[SPARK-15312][SQL] Detect Duplicate Key in Partition Spec and Table Properties
#### What changes were proposed in this pull request? When there are duplicate keys in the partition specs or table properties, we always use the last value and ignore all the previous values. This is caused by the function call `toMap`. partition specs or table properties are widely used in multiple DDL statements. This PR is to detect the duplicates and issue an exception if found. #### How was this patch tested? Added test cases in DDLSuite Author: gatorsmile <gatorsmile@gmail.com> Closes #13095 from gatorsmile/detectDuplicate.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala15
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala7
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala7
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala15
5 files changed, 34 insertions, 12 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2d7d0f9032..cace026701 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -125,14 +125,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
val namedQuery = visitNamedQuery(nCtx)
(namedQuery.alias, namedQuery)
}
-
// Check for duplicate names.
- ctes.groupBy(_._1).filter(_._2.size > 1).foreach {
- case (name, _) =>
- throw new ParseException(
- s"Name '$name' is used for multiple common table expressions", ctx)
- }
-
+ checkDuplicateKeys(ctes, ctx)
With(query, ctes.toMap)
}
}
@@ -220,11 +214,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
*/
override def visitPartitionSpec(
ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
- ctx.partitionVal.asScala.map { pVal =>
+ val parts = ctx.partitionVal.asScala.map { pVal =>
val name = pVal.identifier.getText.toLowerCase
val value = Option(pVal.constant).map(visitStringConstant)
name -> value
- }.toMap
+ }
+ // Check for duplicate partition columns in one spec.
+ checkDuplicateKeys(parts, ctx)
+ parts.toMap
}
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 58e2bdb6e2..9619884ede 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -43,6 +43,13 @@ object ParserUtils {
new ParseException(s"Operation not allowed: $message", ctx)
}
+ /** Check if duplicate keys exist in a set of key-value pairs. */
+ def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: ParserRuleContext): Unit = {
+ keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) =>
+ throw new ParseException(s"Found duplicate keys '$key'.", ctx)
+ }
+ }
+
/** Get the code that creates the given node. */
def source(ctx: ParserRuleContext): String = {
val stream = ctx.getStart.getInputStream
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 25d87d93be..5811d32cd9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -108,7 +108,7 @@ class PlanParserSuite extends PlanTest {
"cte2" -> table("cte1").select(star())))
intercept(
"with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
- "Name 'cte1' is used for multiple common table expressions")
+ "Found duplicate keys 'cte1'")
}
test("simple select query") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2e3ac9706d..c517b8b55f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -387,11 +387,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
*/
override def visitTablePropertyList(
ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) {
- ctx.tableProperty.asScala.map { property =>
+ val properties = ctx.tableProperty.asScala.map { property =>
val key = visitTablePropertyKey(property.key)
val value = Option(property.value).map(string).orNull
key -> value
- }.toMap
+ }
+ // Check for duplicate property names.
+ checkDuplicateKeys(properties, ctx)
+ properties.toMap
}
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 708b878c84..54f98a6232 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -665,6 +665,21 @@ class DDLCommandSuite extends PlanTest {
assert(parsed.isInstanceOf[Project])
}
+ test("duplicate keys in table properties") {
+ val e = intercept[ParseException] {
+ parser.parsePlan("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('key1' = '1', 'key1' = '2')")
+ }.getMessage
+ assert(e.contains("Found duplicate keys 'key1'"))
+ }
+
+ test("duplicate columns in partition specs") {
+ val e = intercept[ParseException] {
+ parser.parsePlan(
+ "ALTER TABLE dbx.tab1 PARTITION (a='1', a='2') RENAME TO PARTITION (a='100', a='200')")
+ }.getMessage
+ assert(e.contains("Found duplicate keys 'a'"))
+ }
+
test("drop table") {
val tableName1 = "db.tab"
val tableName2 = "tab"