aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorHerman van Hovell <hvanhovell@questtec.nl>2016-01-27 13:45:00 -0800
committerReynold Xin <rxin@databricks.com>2016-01-27 13:45:00 -0800
commitef96cd3c521c175878c38a1ed6eeeab0ed8346b5 (patch)
treead7553255f8ae6620bfdca80ba41538dcab7f310 /sql/hive
parent680afabe78b77e4e63e793236453d69567d24290 (diff)
downloadspark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.tar.gz
spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.tar.bz2
spark-ef96cd3c521c175878c38a1ed6eeeab0ed8346b5.zip
[SPARK-12865][SPARK-12866][SQL] Migrate SparkSQLParser/ExtendedHiveQlParser commands to new Parser
This PR moves all the functionality provided by the SparkSQLParser/ExtendedHiveQlParser to the new Parser hierarchy (SparkQl/HiveQl). This also improves the current SET command parsing: the current implementation swallows ```set role ...``` and ```set autocommit ...``` commands, this PR respects these commands (and passes them on to Hive). This PR and https://github.com/apache/spark/pull/10723 end the use of Parser-Combinator parsers for SQL parsing. As a result we can also remove the ```AbstractSQLParser``` in Catalyst. The PR is marked WIP as long as it doesn't pass all tests. cc rxin viirya winningsix (this touches https://github.com/apache/spark/pull/10144) Author: Herman van Hovell <hvanhovell@questtec.nl> Closes #10905 from hvanhovell/SPARK-12866.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala70
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala8
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala18
-rw-r--r--sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d7974
-rw-r--r--sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c1
-rw-r--r--sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd482
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala2
7 files changed, 25 insertions, 80 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
deleted file mode 100644
index 313ba18f6a..0000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import scala.language.implicitConversions
-
-import org.apache.spark.sql.catalyst.{AbstractSparkSQLParser, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.hive.execution.{AddFile, AddJar, HiveNativeCommand}
-
-/**
- * A parser that recognizes all HiveQL constructs together with Spark SQL specific extensions.
- */
-private[hive] class ExtendedHiveQlParser(sqlContext: HiveContext) extends AbstractSparkSQLParser {
-
- val parser = new HiveQl(sqlContext.conf)
-
- override def parseExpression(sql: String): Expression = parser.parseExpression(sql)
-
- override def parseTableIdentifier(sql: String): TableIdentifier =
- parser.parseTableIdentifier(sql)
-
- // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword`
- // properties via reflection the class in runtime for constructing the SqlLexical object
- protected val ADD = Keyword("ADD")
- protected val DFS = Keyword("DFS")
- protected val FILE = Keyword("FILE")
- protected val JAR = Keyword("JAR")
-
- protected lazy val start: Parser[LogicalPlan] = dfs | addJar | addFile | hiveQl
-
- protected lazy val hiveQl: Parser[LogicalPlan] =
- restInput ^^ {
- case statement =>
- sqlContext.executionHive.withHiveState {
- parser.parsePlan(statement.trim)
- }
- }
-
- protected lazy val dfs: Parser[LogicalPlan] =
- DFS ~> wholeInput ^^ {
- case command => HiveNativeCommand(command.trim)
- }
-
- private lazy val addFile: Parser[LogicalPlan] =
- ADD ~ FILE ~> restInput ^^ {
- case input => AddFile(input.trim)
- }
-
- private lazy val addJar: Parser[LogicalPlan] =
- ADD ~ JAR ~> restInput ^^ {
- case input => AddJar(input.trim)
- }
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index eaca3c9269..1797ea54f2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -316,7 +316,9 @@ class HiveContext private[hive](
}
protected[sql] override def parseSql(sql: String): LogicalPlan = {
- super.parseSql(substitutor.substitute(hiveconf, sql))
+ executionHive.withHiveState {
+ super.parseSql(substitutor.substitute(hiveconf, sql))
+ }
}
override protected[sql] def executePlan(plan: LogicalPlan): this.QueryExecution =
@@ -546,9 +548,7 @@ class HiveContext private[hive](
}
@transient
- protected[sql] override val sqlParser: ParserInterface = {
- new SparkSQLParser(new ExtendedHiveQlParser(this))
- }
+ protected[sql] override val sqlParser: ParserInterface = new HiveQl(conf)
@transient
private val hivePlanner = new SparkPlanner(this) with HiveStrategies {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 46246f8191..22841ed211 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -35,11 +35,12 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.parser._
import org.apache.spark.sql.catalyst.parser.ParseUtils._
+import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.execution.SparkQl
import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
import org.apache.spark.sql.hive.client._
-import org.apache.spark.sql.hive.execution.{AnalyzeTable, DropTable, HiveNativeCommand, HiveScriptIOSchema}
+import org.apache.spark.sql.hive.execution._
import org.apache.spark.sql.types._
import org.apache.spark.sql.AnalysisException
@@ -113,7 +114,6 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
"TOK_CREATEROLE",
"TOK_DESCDATABASE",
- "TOK_DESCFUNCTION",
"TOK_DROPDATABASE",
"TOK_DROPFUNCTION",
@@ -151,7 +151,6 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
"TOK_SHOW_TRANSACTIONS",
"TOK_SHOWCOLUMNS",
"TOK_SHOWDATABASES",
- "TOK_SHOWFUNCTIONS",
"TOK_SHOWINDEXES",
"TOK_SHOWLOCKS",
"TOK_SHOWPARTITIONS",
@@ -244,6 +243,15 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
protected override def nodeToPlan(node: ASTNode): LogicalPlan = {
node match {
+ case Token("TOK_DFS", Nil) =>
+ HiveNativeCommand(node.source + " " + node.remainder)
+
+ case Token("TOK_ADDFILE", Nil) =>
+ AddFile(node.remainder)
+
+ case Token("TOK_ADDJAR", Nil) =>
+ AddJar(node.remainder)
+
// Special drop table that also uncaches.
case Token("TOK_DROPTABLE", Token("TOK_TABNAME", tableNameParts) :: ifExists) =>
val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
@@ -558,7 +566,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
protected override def nodeToTransformation(
node: ASTNode,
- child: LogicalPlan): Option[ScriptTransformation] = node match {
+ child: LogicalPlan): Option[logical.ScriptTransformation] = node match {
case Token("TOK_SELEXPR",
Token("TOK_TRANSFORM",
Token("TOK_EXPLIST", inputExprs) ::
@@ -651,7 +659,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
schemaLess)
Some(
- ScriptTransformation(
+ logical.ScriptTransformation(
inputExprs.map(nodeToExpr),
unescapedScript,
output,
diff --git a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797 b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
index 175795534f..f400819b67 100644
--- a/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
+++ b/sql/hive/src/test/resources/golden/show_functions-1-4a6f611305f58bdbafb2fd89ec62d797
@@ -1,4 +1,5 @@
case
+cbrt
ceil
ceiling
coalesce
@@ -17,3 +18,6 @@ covar_samp
create_union
cume_dist
current_database
+current_date
+current_timestamp
+current_user
diff --git a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
index 3c25d656bd..19458fc86e 100644
--- a/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
+++ b/sql/hive/src/test/resources/golden/show_functions-2-97cbada21ad9efda7ce9de5891deca7c
@@ -2,6 +2,7 @@ assert_true
case
coalesce
current_database
+current_date
decode
e
encode
diff --git a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48 b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
index cd2e58d04a..1d05f843a7 100644
--- a/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
+++ b/sql/hive/src/test/resources/golden/show_functions-4-4deaa213aff83575bbaf859f79bfdd48
@@ -1,4 +1,6 @@
+current_date
date_add
+date_format
date_sub
datediff
to_date
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 9e53d8a81e..0d62d799c8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.parser.ParserConf
import org.apache.spark.sql.execution.SparkQl
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation
-import org.apache.spark.sql.hive.{ExtendedHiveQlParser, HiveContext, HiveQl, MetastoreRelation}
+import org.apache.spark.sql.hive.{HiveContext, HiveQl, MetastoreRelation}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types._