[SPARK-12865][SPARK-12866][SQL] Migrate SparkSQLParser/ExtendedHiveQlParser commands to new Parser

This PR moves all the functionality provided by the SparkSQLParser/ExtendedHiveQlParser to the new Parser hierarchy (SparkQl/HiveQl). This also improves the current SET command parsing: the current implementation swallows ```set role ...``` and ```set autocommit ...``` commands, this PR respects these commands (and passes them on to Hive). This PR and https://github.com/apache/spark/pull/10723 end the use of Parser-Combinator parsers for SQL parsing. As a result we can also remove the ```AbstractSQLParser``` in Catalyst. The PR is marked WIP as long as it doesn't pass all tests. cc rxin viirya winningsix (this touches https://github.com/apache/spark/pull/10144) Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #10905 from hvanhovell/SPARK-12866.
author: Herman van Hovell <hvanhovell@questtec.nl> 2016-01-27 13:45:00 -0800
committer: Reynold Xin <rxin@databricks.com> 2016-01-27 13:45:00 -0800
commit: ef96cd3c521c175878c38a1ed6eeeab0ed8346b5 (patch)
tree: ad7553255f8ae6620bfdca80ba41538dcab7f310 /sql/hive
parent: 680afabe78b77e4e63e793236453d69567d24290 (diff)
download: spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.tar.gz
spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.tar.bz2
spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.zip
7 files changed, 25 insertions, 80 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
deleted file mode 100644
index 313ba18f6a..0000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import scala.language.implicitConversions
-
-import org.apache.spark.sql.catalyst.{AbstractSparkSQLParser, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.hive.execution.{AddFile, AddJar, HiveNativeCommand}
-
-/**
- * A parser that recognizes all HiveQL constructs together with Spark SQL specific extensions.
- */
-private[hive] class ExtendedHiveQlParser(sqlContext: HiveContext) extends AbstractSparkSQLParser {
-
-  val parser = new HiveQl(sqlContext.conf)
-
-  override def parseExpression(sql: String): Expression = parser.parseExpression(sql)
-
-  override def parseTableIdentifier(sql: String): TableIdentifier =
-    parser.parseTableIdentifier(sql)
-
-  // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
-  // properties via reflection the class in runtime for constructing the SqlLexical object
-  protected val ADD = Keyword("ADD")
-  protected val DFS = Keyword("DFS")
-  protected val FILE = Keyword("FILE")
-  protected val JAR = Keyword("JAR")
-
-  protected lazy val start: Parser[LogicalPlan] = dfs | addJar | addFile | hiveQl
-
-  protected lazy val hiveQl: Parser[LogicalPlan] =
-    restInput ^^ {
-      case statement =>
-        sqlContext.executionHive.withHiveState {
-          parser.parsePlan(statement.trim)
-        }
-    }
-
-  protected lazy val dfs: Parser[LogicalPlan] =
-    DFS ~> wholeInput ^^ {
-      case command => HiveNativeCommand(command.trim)
-    }
-
-  private lazy val addFile: Parser[LogicalPlan] =
-    ADD ~ FILE ~> restInput ^^ {
-      case input => AddFile(input.trim)
-    }
-
-  private lazy val addJar: Parser[LogicalPlan] =
-    ADD ~ JAR ~> restInput ^^ {
-      case input => AddJar(input.trim)
-    }
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index eaca3c9269..1797ea54f2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -316,7 +316,9 @@ class HiveContext private[hive](
   }
 
   protected[sql] override def parseSql(sql: String): LogicalPlan = {
-    super.parseSql(substitutor.substitute(hiveconf, sql))
+    executionHive.withHiveState {
+      super.parseSql(substitutor.substitute(hiveconf, sql))
+    }
   }
 
   override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
@@ -546,9 +548,7 @@ class HiveContext private[hive](
   }
 
   @transient
-  protected[sql] override val sqlParser: ParserInterface = {
-    new SparkSQLParser(new ExtendedHiveQlParser(this))
-  }
+  protected[sql] override val sqlParser: ParserInterface = new HiveQl(conf)
 
   @transient
   private val hivePlanner = new SparkPlanner(this) with HiveStrategies {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 46246f8191..22841ed211 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -35,11 +35,12 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser._
 import org.apache.spark.sql.catalyst.parser.ParseUtils._
+import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.SparkQl
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 import org.apache.spark.sql.hive.client._
-import org.apache.spark.sql.hive.execution.{AnalyzeTable, DropTable, HiveNativeCommand, HiveScriptIOSchema}
+import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.AnalysisException
 
@@ -113,7 +114,6 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
     "TOK_CREATEROLE",
 
     "TOK_DESCDATABASE",
-    "TOK_DESCFUNCTION",
 
     "TOK_DROPDATABASE",
     "TOK_DROPFUNCTION",
@@ -151,7 +151,6 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
     "TOK_SHOW_TRANSACTIONS",
     "TOK_SHOWCOLUMNS",
     "TOK_SHOWDATABASES",
-    "TOK_SHOWFUNCTIONS",
     "TOK_SHOWINDEXES",
     "TOK_SHOWLOCKS",
     "TOK_SHOWPARTITIONS",
@@ -244,6 +243,15 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
 
   protected override def nodeToPlan(node: ASTNode): LogicalPlan = {
     node match {
+      case Token("TOK_DFS", Nil) =>
+        HiveNativeCommand(node.source + " " + node.remainder)
+
+      case Token("TOK_ADDFILE", Nil) =>
+        AddFile(node.remainder)
+
+      case Token("TOK_ADDJAR", Nil) =>
+        AddJar(node.remainder)
+
       // Special drop table that also uncaches.
       case Token("TOK_DROPTABLE", Token("TOK_TABNAME", tableNameParts) :: ifExists) =>
         val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
@@ -558,7 +566,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
 
   protected override def nodeToTransformation(
       node: ASTNode,
-      child: LogicalPlan): Option[ScriptTransformation] = node match {
+      child: LogicalPlan): Option[logical.ScriptTransformation] = node match {
     case Token("TOK_SELEXPR",
       Token("TOK_TRANSFORM",
       Token("TOK_EXPLIST", inputExprs) ::
@@ -651,7 +659,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
         schemaLess)
 
       Some(
-        ScriptTransformation(
+        logical.ScriptTransformation(
           inputExprs.map(nodeToExpr),
           unescapedScript,
           output,
diff --git a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797 b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
index 175795534f..f400819b67 100644
--- a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
+++ b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
@@ -1,4 +1,5 @@
 case
+cbrt
 ceil
 ceiling
 coalesce
@@ -17,3 +18,6 @@ covar_samp
 create_union
 cume_dist
 current_database
+current_date
+current_timestamp
+current_user
diff --git a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
index 3c25d656bd..19458fc86e 100644
--- a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
+++ b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
@@ -2,6 +2,7 @@ assert_true
 case
 coalesce
 current_database
+current_date
 decode
 e
 encode
diff --git a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48 b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
index cd2e58d04a..1d05f843a7 100644
--- a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
+++ b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
@@ -1,4 +1,6 @@
+current_date
 date_add
+date_format
 date_sub
 datediff
 to_date
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 9e53d8a81e..0d62d799c8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.parser.ParserConf
 import org.apache.spark.sql.execution.SparkQl
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation
-import org.apache.spark.sql.hive.{ExtendedHiveQlParser, HiveContext, HiveQl, MetastoreRelation}
+import org.apache.spark.sql.hive.{HiveContext, HiveQl, MetastoreRelation}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
author	Herman van Hovell <hvanhovell@questtec.nl>	2016-01-27 13:45:00 -0800
committer	Reynold Xin <rxin@databricks.com>	2016-01-27 13:45:00 -0800
commit	ef96cd3c521c175878c38a1ed6eeeab0ed8346b5 (patch)
tree	ad7553255f8ae6620bfdca80ba41538dcab7f310 /sql/hive
parent	680afabe78b77e4e63e793236453d69567d24290 (diff)
download	spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.tar.gz spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.tar.bz2 spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.zip