aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-01-21 13:05:56 -0800
committerMichael Armbrust <michael@databricks.com>2015-01-21 13:05:56 -0800
commit8361078efae7d79742d6be94cf5a15637ec860dd (patch)
tree8725739ba1d9015e1276ba3ddeae3c9c89d0f207 /sql/catalyst
parent812d3679f5f97df7b667cbc3365a49866ebc02d5 (diff)
downloadspark-8361078efae7d79742d6be94cf5a15637ec860dd.tar.gz
spark-8361078efae7d79742d6be94cf5a15637ec860dd.tar.bz2
spark-8361078efae7d79742d6be94cf5a15637ec860dd.zip
[SPARK-5009] [SQL] Long keyword support in SQL Parsers
* The `SqlLexical.allCaseVersions` will cause `StackOverflowException` if the key word is too long, the patch will fix that by normalizing all of the keywords in `SqlLexical`. * And make a unified SparkSQLParser for sharing the common code. Author: Cheng Hao <hao.cheng@intel.com> Closes #3926 from chenghao-intel/long_keyword and squashes the following commits: 686660f [Cheng Hao] Support Long Keyword and Refactor the SQLParsers
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala59
-rwxr-xr-xsql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala15
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala61
3 files changed, 106 insertions, 29 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
index 93d74adbcc..366be00473 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala
@@ -25,15 +25,42 @@ import scala.util.parsing.input.CharArrayReader.EofCh
import org.apache.spark.sql.catalyst.plans.logical._
+private[sql] object KeywordNormalizer {
+ def apply(str: String) = str.toLowerCase()
+}
+
private[sql] abstract class AbstractSparkSQLParser
extends StandardTokenParsers with PackratParsers {
- def apply(input: String): LogicalPlan = phrase(start)(new lexical.Scanner(input)) match {
- case Success(plan, _) => plan
- case failureOrError => sys.error(failureOrError.toString)
+ def apply(input: String): LogicalPlan = {
+ // Initialize the Keywords.
+ lexical.initialize(reservedWords)
+ phrase(start)(new lexical.Scanner(input)) match {
+ case Success(plan, _) => plan
+ case failureOrError => sys.error(failureOrError.toString)
+ }
}
- protected case class Keyword(str: String)
+ protected case class Keyword(str: String) {
+ def normalize = KeywordNormalizer(str)
+ def parser: Parser[String] = normalize
+ }
+
+ protected implicit def asParser(k: Keyword): Parser[String] = k.parser
+
+ // By default, use Reflection to find the reserved words defined in the sub class.
+ // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
+ // method during the parent class instantiation, because the sub class instance
+ // isn't created yet.
+ protected lazy val reservedWords: Seq[String] =
+ this
+ .getClass
+ .getMethods
+ .filter(_.getReturnType == classOf[Keyword])
+ .map(_.invoke(this).asInstanceOf[Keyword].normalize)
+
+ // Set the keywords as empty by default, will change that later.
+ override val lexical = new SqlLexical
protected def start: Parser[LogicalPlan]
@@ -52,18 +79,27 @@ private[sql] abstract class AbstractSparkSQLParser
}
}
-class SqlLexical(val keywords: Seq[String]) extends StdLexical {
+class SqlLexical extends StdLexical {
case class FloatLit(chars: String) extends Token {
override def toString = chars
}
- reserved ++= keywords.flatMap(w => allCaseVersions(w))
+ /* This is a work around to support the lazy setting */
+ def initialize(keywords: Seq[String]): Unit = {
+ reserved.clear()
+ reserved ++= keywords
+ }
delimiters += (
"@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
)
+ protected override def processIdent(name: String) = {
+ val token = KeywordNormalizer(name)
+ if (reserved contains token) Keyword(token) else Identifier(name)
+ }
+
override lazy val token: Parser[Token] =
( identChar ~ (identChar | digit).* ^^
{ case first ~ rest => processIdent((first :: rest).mkString) }
@@ -94,14 +130,5 @@ class SqlLexical(val keywords: Seq[String]) extends StdLexical {
| '-' ~ '-' ~ chrExcept(EofCh, '\n').*
| '/' ~ '*' ~ failure("unclosed comment")
).*
-
- /** Generate all variations of upper and lower case of a given string */
- def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
- if (s.isEmpty) {
- Stream(prefix)
- } else {
- allCaseVersions(s.tail, prefix + s.head.toLower) #:::
- allCaseVersions(s.tail, prefix + s.head.toUpper)
- }
- }
}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
index 388e2f74a0..4ca4e05edd 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala
@@ -36,9 +36,8 @@ import org.apache.spark.sql.types._
* for a SQL like language should checkout the HiveQL support in the sql/hive sub-project.
*/
class SqlParser extends AbstractSparkSQLParser {
- protected implicit def asParser(k: Keyword): Parser[String] =
- lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
-
+ // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
+ // properties via reflection the class in runtime for constructing the SqlLexical object
protected val ABS = Keyword("ABS")
protected val ALL = Keyword("ALL")
protected val AND = Keyword("AND")
@@ -108,16 +107,6 @@ class SqlParser extends AbstractSparkSQLParser {
protected val WHEN = Keyword("WHEN")
protected val WHERE = Keyword("WHERE")
- // Use reflection to find the reserved words defined in this class.
- protected val reservedWords =
- this
- .getClass
- .getMethods
- .filter(_.getReturnType == classOf[Keyword])
- .map(_.invoke(this).asInstanceOf[Keyword].str)
-
- override val lexical = new SqlLexical(reservedWords)
-
protected def assignAliases(exprs: Seq[Expression]): Seq[NamedExpression] = {
exprs.zipWithIndex.map {
case (ne: NamedExpression, _) => ne
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
new file mode 100644
index 0000000000..1a0a0e6154
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.Command
+import org.scalatest.FunSuite
+
+private[sql] case class TestCommand(cmd: String) extends Command
+
+private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser {
+ protected val EXECUTE = Keyword("THISISASUPERLONGKEYWORDTEST")
+
+ override protected lazy val start: Parser[LogicalPlan] = set
+
+ private lazy val set: Parser[LogicalPlan] =
+ EXECUTE ~> ident ^^ {
+ case fileName => TestCommand(fileName)
+ }
+}
+
+private[sql] class CaseInsensitiveTestParser extends AbstractSparkSQLParser {
+ protected val EXECUTE = Keyword("EXECUTE")
+
+ override protected lazy val start: Parser[LogicalPlan] = set
+
+ private lazy val set: Parser[LogicalPlan] =
+ EXECUTE ~> ident ^^ {
+ case fileName => TestCommand(fileName)
+ }
+}
+
+class SqlParserSuite extends FunSuite {
+
+ test("test long keyword") {
+ val parser = new SuperLongKeywordTestParser
+ assert(TestCommand("NotRealCommand") === parser("ThisIsASuperLongKeyWordTest NotRealCommand"))
+ }
+
+ test("test case insensitive") {
+ val parser = new CaseInsensitiveTestParser
+ assert(TestCommand("NotRealCommand") === parser("EXECUTE NotRealCommand"))
+ assert(TestCommand("NotRealCommand") === parser("execute NotRealCommand"))
+ assert(TestCommand("NotRealCommand") === parser("exEcute NotRealCommand"))
+ }
+}