[SPARK-17832][SQL] TableIdentifier.quotedString creates un-parseable names when name contains a backtick

## What changes were proposed in this pull request? The `quotedString` method in `TableIdentifier` and `FunctionIdentifier` produce an illegal (un-parseable) name when the name contains a backtick. For example: ``` import org.apache.spark.sql.catalyst.parser.CatalystSqlParser._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1")) parseTableIdentifier(complexName.unquotedString) // Does not work parseTableIdentifier(complexName.quotedString) // Does not work parseExpression(complexName.unquotedString) // Does not work parseExpression(complexName.quotedString) // Does not work ``` We should handle the backtick properly to make `quotedString` parseable. ## How was this patch tested? Add new testcases in `TableIdentifierParserSuite` and `ExpressionParserSuite`. Author: jiangxingbo <jiangxb1987@gmail.com> Closes #15403 from jiangxb1987/backtick.
author: jiangxingbo <jiangxb1987@gmail.com> 2016-10-09 21:52:46 -0700
committer: Herman van Hovell <hvanhovell@databricks.com> 2016-10-09 21:52:46 -0700
commit: 26fbca480604ba258f97b9590cfd6dda1ecd31db (patch)
tree: 8580656205ca7a2b2533f7b695e36973dd6ef059
parent: 8a6bbe095b6a9aa33989c0deaa5ed0128d70320f (diff)
download: spark-26fbca480604ba258f97b9590cfd6dda1ecd31db.tar.gz
spark-26fbca480604ba258f97b9590cfd6dda1ecd31db.tar.bz2
spark-26fbca480604ba258f97b9590cfd6dda1ecd31db.zip
3 files changed, 29 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index d7b48ceca5..834897b850 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst
 
-
 /**
  * An identifier that optionally specifies a database.
  *
@@ -29,8 +28,16 @@ sealed trait IdentifierWithDatabase {
 
   def database: Option[String]
 
+  /*
+   * Escapes back-ticks within the identifier name with double-back-ticks.
+   */
+  private def quoteIdentifier(name: String): String = name.replace("`", "``")
+
   def quotedString: String = {
-    if (database.isDefined) s"`${database.get}`.`$identifier`" else s"`$identifier`"
+    val replacedId = quoteIdentifier(identifier)
+    val replacedDb = database.map(quoteIdentifier(_))
+
+    if (replacedDb.isDefined) s"`${replacedDb.get}`.`$replacedId`" else s"`$replacedId`"
   }
 
   def unquotedString: String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 0fb1138478..17cfc81588 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.parser
 
 import java.sql.{Date, Timestamp}
 
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -535,4 +535,13 @@ class ExpressionParserSuite extends PlanTest {
     // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
     assertEqual("a.123BD_column", UnresolvedAttribute("a.123BD_column"))
   }
+
+  test("SPARK-17832 function identifier contains backtick") {
+    val complexName = FunctionIdentifier("`ba`r", Some("`fo`o"))
+    assertEqual(complexName.quotedString, UnresolvedAttribute("`fo`o.`ba`r"))
+    intercept(complexName.unquotedString, "mismatched input")
+    // Function identifier contains countious backticks should be treated correctly.
+    val complexName2 = FunctionIdentifier("ba``r", Some("fo``o"))
+    assertEqual(complexName2.quotedString, UnresolvedAttribute("fo``o.ba``r"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 793be8953d..7d46011b41 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -104,4 +104,14 @@ class TableIdentifierParserSuite extends SparkFunSuite {
     // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
     assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
   }
+
+  test("SPARK-17832 table identifier - contains backtick") {
+    val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
+    assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
+    assert(complexName === parseTableIdentifier(complexName.quotedString))
+    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
+    // Table identifier contains countious backticks should be treated correctly.
+    val complexName2 = TableIdentifier("x``y", Some("d``b"))
+    assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
+  }
 }
author	jiangxingbo <jiangxb1987@gmail.com>	2016-10-09 21:52:46 -0700
committer	Herman van Hovell <hvanhovell@databricks.com>	2016-10-09 21:52:46 -0700
commit	26fbca480604ba258f97b9590cfd6dda1ecd31db (patch)
tree	8580656205ca7a2b2533f7b695e36973dd6ef059
parent	8a6bbe095b6a9aa33989c0deaa5ed0128d70320f (diff)
download	spark-26fbca480604ba258f97b9590cfd6dda1ecd31db.tar.gz spark-26fbca480604ba258f97b9590cfd6dda1ecd31db.tar.bz2 spark-26fbca480604ba258f97b9590cfd6dda1ecd31db.zip