aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorravipesala <ravindra.pesala@huawei.com>2014-10-02 20:04:33 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-02 20:04:33 -0700
commit1c90347a4bba12df7b76d282a7dbac8e555e049f (patch)
tree2f752a98d80c375fffc861ba7b11f07792d5dcd0
parent7de4e50a01e90bcf88e0b721b2b15a5162373d56 (diff)
downloadspark-1c90347a4bba12df7b76d282a7dbac8e555e049f.tar.gz
spark-1c90347a4bba12df7b76d282a7dbac8e555e049f.tar.bz2
spark-1c90347a4bba12df7b76d282a7dbac8e555e049f.zip
[SPARK-3654][SQL] Implement all extended HiveQL statements/commands with a separate parser combinator
Created separate parser for hql. It preparses the commands like cache,uncache,add jar etc.. and then parses with HiveQl Author: ravipesala <ravindra.pesala@huawei.com> Closes #2590 from ravipesala/SPARK-3654 and squashes the following commits: bbca7dd [ravipesala] Fixed code as per admin comments. ae9290a [ravipesala] Fixed style issues as per Admin comments 898ed81 [ravipesala] Removed spaces fb24edf [ravipesala] Updated the code as per admin comments 8947d37 [ravipesala] Removed duplicate code ba26cd1 [ravipesala] Created seperate parser for hql.It pre parses the commands like cache,uncache,add jar etc.. and then parses with HiveQl
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala135
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala57
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala6
3 files changed, 154 insertions, 44 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
new file mode 100644
index 0000000000..e7e1cb980c
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import scala.language.implicitConversions
+import scala.util.parsing.combinator.syntactical.StandardTokenParsers
+import scala.util.parsing.combinator.PackratParsers
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.SqlLexical
+
+/**
+ * A parser that recognizes all HiveQL constructs together with several Spark SQL specific
+ * extensions like CACHE TABLE and UNCACHE TABLE.
+ */
+private[hive] class ExtendedHiveQlParser extends StandardTokenParsers with PackratParsers {
+
+ def apply(input: String): LogicalPlan = {
+ // Special-case out set commands since the value fields can be
+ // complex to handle without RegexParsers. Also this approach
+ // is clearer for the several possible cases of set commands.
+ if (input.trim.toLowerCase.startsWith("set")) {
+ input.trim.drop(3).split("=", 2).map(_.trim) match {
+ case Array("") => // "set"
+ SetCommand(None, None)
+ case Array(key) => // "set key"
+ SetCommand(Some(key), None)
+ case Array(key, value) => // "set key=value"
+ SetCommand(Some(key), Some(value))
+ }
+ } else if (input.trim.startsWith("!")) {
+ ShellCommand(input.drop(1))
+ } else {
+ phrase(query)(new lexical.Scanner(input)) match {
+ case Success(r, x) => r
+ case x => sys.error(x.toString)
+ }
+ }
+ }
+
+ protected case class Keyword(str: String)
+
+ protected val CACHE = Keyword("CACHE")
+ protected val SET = Keyword("SET")
+ protected val ADD = Keyword("ADD")
+ protected val JAR = Keyword("JAR")
+ protected val TABLE = Keyword("TABLE")
+ protected val AS = Keyword("AS")
+ protected val UNCACHE = Keyword("UNCACHE")
+ protected val FILE = Keyword("FILE")
+ protected val DFS = Keyword("DFS")
+ protected val SOURCE = Keyword("SOURCE")
+
+ protected implicit def asParser(k: Keyword): Parser[String] =
+ lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)
+
+ protected def allCaseConverse(k: String): Parser[String] =
+ lexical.allCaseVersions(k).map(x => x : Parser[String]).reduce(_ | _)
+
+ protected val reservedWords =
+ this.getClass
+ .getMethods
+ .filter(_.getReturnType == classOf[Keyword])
+ .map(_.invoke(this).asInstanceOf[Keyword].str)
+
+ override val lexical = new SqlLexical(reservedWords)
+
+ protected lazy val query: Parser[LogicalPlan] =
+ cache | uncache | addJar | addFile | dfs | source | hiveQl
+
+ protected lazy val hiveQl: Parser[LogicalPlan] =
+ remainingQuery ^^ {
+ case r => HiveQl.createPlan(r.trim())
+ }
+
+ /** It returns all remaining query */
+ protected lazy val remainingQuery: Parser[String] = new Parser[String] {
+ def apply(in: Input) =
+ Success(
+ in.source.subSequence(in.offset, in.source.length).toString,
+ in.drop(in.source.length()))
+ }
+
+ /** It returns all query */
+ protected lazy val allQuery: Parser[String] = new Parser[String] {
+ def apply(in: Input) =
+ Success(in.source.toString, in.drop(in.source.length()))
+ }
+
+ protected lazy val cache: Parser[LogicalPlan] =
+ CACHE ~ TABLE ~> ident ~ opt(AS ~> hiveQl) ^^ {
+ case tableName ~ None => CacheCommand(tableName, true)
+ case tableName ~ Some(plan) =>
+ CacheTableAsSelectCommand(tableName, plan)
+ }
+
+ protected lazy val uncache: Parser[LogicalPlan] =
+ UNCACHE ~ TABLE ~> ident ^^ {
+ case tableName => CacheCommand(tableName, false)
+ }
+
+ protected lazy val addJar: Parser[LogicalPlan] =
+ ADD ~ JAR ~> remainingQuery ^^ {
+ case rq => AddJar(rq.trim())
+ }
+
+ protected lazy val addFile: Parser[LogicalPlan] =
+ ADD ~ FILE ~> remainingQuery ^^ {
+ case rq => AddFile(rq.trim())
+ }
+
+ protected lazy val dfs: Parser[LogicalPlan] =
+ DFS ~> allQuery ^^ {
+ case aq => NativeCommand(aq.trim())
+ }
+
+ protected lazy val source: Parser[LogicalPlan] =
+ SOURCE ~> remainingQuery ^^ {
+ case rq => SourceCommand(rq.trim())
+ }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 4f3f808c93..6bb42eeb05 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -126,6 +126,9 @@ private[hive] object HiveQl {
"TOK_CREATETABLE",
"TOK_DESCTABLE"
) ++ nativeCommands
+
+ // It parses hive sql query along with with several Spark SQL specific extensions
+ protected val hiveSqlParser = new ExtendedHiveQlParser
/**
* A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations
@@ -215,40 +218,19 @@ private[hive] object HiveQl {
def getAst(sql: String): ASTNode = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql))
/** Returns a LogicalPlan for a given HiveQL string. */
- def parseSql(sql: String): LogicalPlan = {
+ def parseSql(sql: String): LogicalPlan = hiveSqlParser(sql)
+
+ /** Creates LogicalPlan for a given HiveQL string. */
+ def createPlan(sql: String) = {
try {
- if (sql.trim.toLowerCase.startsWith("set")) {
- // Split in two parts since we treat the part before the first "="
- // as key, and the part after as value, which may contain other "=" signs.
- sql.trim.drop(3).split("=", 2).map(_.trim) match {
- case Array("") => // "set"
- SetCommand(None, None)
- case Array(key) => // "set key"
- SetCommand(Some(key), None)
- case Array(key, value) => // "set key=value"
- SetCommand(Some(key), Some(value))
- }
- } else if (sql.trim.toLowerCase.startsWith("cache table")) {
- sql.trim.drop(12).trim.split(" ").toSeq match {
- case Seq(tableName) =>
- CacheCommand(tableName, true)
- case Seq(tableName, _, select @ _*) =>
- CacheTableAsSelectCommand(tableName, createPlan(select.mkString(" ").trim))
- }
- } else if (sql.trim.toLowerCase.startsWith("uncache table")) {
- CacheCommand(sql.trim.drop(14).trim, false)
- } else if (sql.trim.toLowerCase.startsWith("add jar")) {
- AddJar(sql.trim.drop(8).trim)
- } else if (sql.trim.toLowerCase.startsWith("add file")) {
- AddFile(sql.trim.drop(9))
- } else if (sql.trim.toLowerCase.startsWith("dfs")) {
+ val tree = getAst(sql)
+ if (nativeCommands contains tree.getText) {
NativeCommand(sql)
- } else if (sql.trim.startsWith("source")) {
- SourceCommand(sql.split(" ").toSeq match { case Seq("source", filePath) => filePath })
- } else if (sql.trim.startsWith("!")) {
- ShellCommand(sql.drop(1))
} else {
- createPlan(sql)
+ nodeToPlan(tree) match {
+ case NativePlaceholder => NativeCommand(sql)
+ case other => other
+ }
}
} catch {
case e: Exception => throw new ParseException(sql, e)
@@ -259,19 +241,6 @@ private[hive] object HiveQl {
""".stripMargin)
}
}
-
- /** Creates LogicalPlan for a given HiveQL string. */
- def createPlan(sql: String) = {
- val tree = getAst(sql)
- if (nativeCommands contains tree.getText) {
- NativeCommand(sql)
- } else {
- nodeToPlan(tree) match {
- case NativePlaceholder => NativeCommand(sql)
- case other => other
- }
- }
- }
def parseDdl(ddl: String): Seq[Attribute] = {
val tree =
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 188579edd7..b3057cd618 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -88,4 +88,10 @@ class CachedTableSuite extends HiveComparisonTest {
}
assert(!TestHive.isCached("src"), "Table 'src' should not be cached")
}
+
+ test("'CACHE TABLE tableName AS SELECT ..'") {
+ TestHive.sql("CACHE TABLE testCacheTable AS SELECT * FROM src")
+ assert(TestHive.isCached("testCacheTable"), "Table 'testCacheTable' should be cached")
+ TestHive.uncacheTable("testCacheTable")
+ }
}