diff options
author | Herman van Hovell <hvanhovell@questtec.nl> | 2016-05-02 18:12:31 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-05-02 18:12:31 -0700 |
commit | 1c19c2769edecaefabc2cd67b3b32f901feb3f59 (patch) | |
tree | 1d561c79d936f5b4cd3e05924abbcb7cc0417f99 /sql | |
parent | d37c7f7f042f7943b5b684e53cf4284c601fb347 (diff) | |
download | spark-1c19c2769edecaefabc2cd67b3b32f901feb3f59.tar.gz spark-1c19c2769edecaefabc2cd67b3b32f901feb3f59.tar.bz2 spark-1c19c2769edecaefabc2cd67b3b32f901feb3f59.zip |
[SPARK-15047][SQL] Cleanup SQL Parser
## What changes were proposed in this pull request?
This PR addresses a few minor issues in SQL parser:
- Removes some unused rules and keywords in the grammar.
- Removes code path for fallback SQL parsing (was needed for Hive native parsing).
- Use `UnresolvedGenerator` instead of hard-coding `Explode` & `JsonTuple`.
- Adds a more generic way of creating error messages for unsupported Hive features.
- Use `visitFunctionName` as much as possible.
- Interpret a `CatalogColumn`'s `DataType` directly instead of parsing it again.
## How was this patch tested?
Existing tests.
Author: Herman van Hovell <hvanhovell@questtec.nl>
Closes #12826 from hvanhovell/SPARK-15047.
Diffstat (limited to 'sql')
6 files changed, 31 insertions, 77 deletions
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 4d5d125ecd..cc4e5c853e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -121,17 +121,13 @@ statement | UNCACHE TABLE identifier #uncacheTable | CLEAR CACHE #clearCache | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE - tableIdentifier partitionSpec? #loadData + tableIdentifier partitionSpec? #loadData + | TRUNCATE TABLE tableIdentifier partitionSpec? + (COLUMNS identifierList)? #truncateTable | ADD identifier .*? #addResource | SET ROLE .*? #failNativeCommand | SET .*? #setConfiguration - | kws=unsupportedHiveNativeCommands .*? #failNativeCommand - | hiveNativeCommands #executeNativeCommand - ; - -hiveNativeCommands - : TRUNCATE TABLE tableIdentifier partitionSpec? - (COLUMNS identifierList)? + | unsupportedHiveNativeCommands .*? #failNativeCommand ; unsupportedHiveNativeCommands @@ -267,14 +263,6 @@ nestedConstantList : '(' constantList (',' constantList)* ')' ; -skewedLocation - : (constant | constantList) EQ STRING - ; - -skewedLocationList - : '(' skewedLocation (',' skewedLocation)* ')' - ; - createFileFormat : STORED AS fileFormat | STORED BY storageHandler @@ -609,11 +597,6 @@ explainOption : LOGICAL | FORMATTED | EXTENDED | CODEGEN ; -transactionMode - : ISOLATION LEVEL SNAPSHOT #isolationLevel - | READ accessMode=(ONLY | WRITE) #transactionAccessMode - ; - qualifiedName : identifier ('.' identifier)* ; @@ -661,8 +644,7 @@ nonReserved | VIEW | REPLACE | IF | NO | DATA - | START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL - | SNAPSHOT | READ | WRITE | ONLY + | START | TRANSACTION | COMMIT | ROLLBACK | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | CONCATENATE | CHANGE | FIRST | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | INPUTFORMAT | OUTPUTFORMAT @@ -778,13 +760,6 @@ START: 'START'; TRANSACTION: 'TRANSACTION'; COMMIT: 'COMMIT'; ROLLBACK: 'ROLLBACK'; -WORK: 'WORK'; -ISOLATION: 'ISOLATION'; -LEVEL: 'LEVEL'; -SNAPSHOT: 'SNAPSHOT'; -READ: 'READ'; -WRITE: 'WRITE'; -ONLY: 'ONLY'; MACRO: 'MACRO'; IF: 'IF'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 1f923f47dd..c3974625aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -82,25 +82,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { protected def plan(tree: ParserRuleContext): LogicalPlan = typedVisit(tree) /** - * Make sure we do not try to create a plan for a native command. - */ - override def visitExecuteNativeCommand(ctx: ExecuteNativeCommandContext): LogicalPlan = null - - /** * Create a plan for a SHOW FUNCTIONS command. */ override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) { import ctx._ if (qualifiedName != null) { - val names = qualifiedName().identifier().asScala.map(_.getText).toList - names match { - case db :: name :: Nil => - ShowFunctions(Some(db), Some(name)) - case name :: Nil => - ShowFunctions(None, Some(name)) - case _ => - throw new ParseException("SHOW FUNCTIONS unsupported name", ctx) - } + val name = visitFunctionName(qualifiedName) + ShowFunctions(name.database, Some(name.funcName)) } else if (pattern != null) { ShowFunctions(None, Some(string(pattern))) } else { @@ -117,7 +105,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { if (describeFuncName.STRING() != null) { string(describeFuncName.STRING()) } else if (describeFuncName.qualifiedName() != null) { - describeFuncName.qualifiedName().identifier().asScala.map(_.getText).mkString(".") + visitFunctionName(describeFuncName.qualifiedName).unquotedString } else { describeFuncName.getText } @@ -554,19 +542,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { query: LogicalPlan, ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) { val expressions = expressionList(ctx.expression) - - // Create the generator. - val generator = ctx.qualifiedName.getText.toLowerCase match { - case "explode" if expressions.size == 1 => - Explode(expressions.head) - case "json_tuple" => - JsonTuple(expressions) - case name => - UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions) - } - Generate( - generator, + UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions), join = true, outer = ctx.OUTER != null, Some(ctx.tblName.getText.toLowerCase), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index d0132529f1..d042e191a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -53,19 +53,15 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser => astBuilder.visitSingleStatement(parser.singleStatement()) match { case plan: LogicalPlan => plan - case _ => nativeCommand(sqlText) + case _ => + val position = Origin(None, None) + throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position) } } /** Get the builder (visitor) which converts a ParseTree into a AST. */ protected def astBuilder: AstBuilder - /** Create a native command, or fail when this is not supported. */ - protected def nativeCommand(sqlText: String): LogicalPlan = { - val position = Origin(None, None) - throw new ParseException(Option(sqlText), "Unsupported SQL statement", position, position) - } - protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = { logInfo(s"Parsing command: $command") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 5e896a33bd..b7af2ceda6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -53,7 +53,7 @@ class PlanParserSuite extends PlanTest { assertEqual("show functions foo", ShowFunctions(None, Some("foo"))) assertEqual("show functions foo.bar", ShowFunctions(Some("foo"), Some("bar"))) assertEqual("show functions 'foo\\\\.*'", ShowFunctions(None, Some("foo\\.*"))) - intercept("show functions foo.bar.baz", "SHOW FUNCTIONS unsupported name") + intercept("show functions foo.bar.baz", "Unsupported function name") } test("describe function") { @@ -263,11 +263,14 @@ class PlanParserSuite extends PlanTest { } test("lateral view") { + val explode = UnresolvedGenerator(FunctionIdentifier("explode"), Seq('x)) + val jsonTuple = UnresolvedGenerator(FunctionIdentifier("json_tuple"), Seq('x, 'y)) + // Single lateral view assertEqual( "select * from t lateral view explode(x) expl as x", table("t") - .generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x")) + .generate(explode, join = true, outer = false, Some("expl"), Seq("x")) .select(star())) // Multiple lateral views @@ -277,12 +280,12 @@ class PlanParserSuite extends PlanTest { |lateral view explode(x) expl |lateral view outer json_tuple(x, y) jtup q, z""".stripMargin, table("t") - .generate(Explode('x), join = true, outer = false, Some("expl"), Seq.empty) - .generate(JsonTuple(Seq('x, 'y)), join = true, outer = true, Some("jtup"), Seq("q", "z")) + .generate(explode, join = true, outer = false, Some("expl"), Seq.empty) + .generate(jsonTuple, join = true, outer = true, Some("jtup"), Seq("q", "z")) .select(star())) // Multi-Insert lateral views. - val from = table("t1").generate(Explode('x), join = true, outer = false, Some("expl"), Seq("x")) + val from = table("t1").generate(explode, join = true, outer = false, Some("expl"), Seq("x")) assertEqual( """from t1 |lateral view explode(x) expl as x @@ -294,7 +297,7 @@ class PlanParserSuite extends PlanTest { |where s < 10 """.stripMargin, Union(from - .generate(JsonTuple(Seq('x, 'y)), join = true, outer = false, Some("jtup"), Seq("q", "z")) + .generate(jsonTuple, join = true, outer = false, Some("jtup"), Seq("q", "z")) .select(star()) .insertInto("t2"), from.where('s < 10).select(star()).insertInto("t3"))) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 8128a6efe3..dfc56a7d98 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -21,6 +21,7 @@ import scala.collection.JavaConverters._ import scala.util.Try import org.antlr.v4.runtime.{ParserRuleContext, Token} +import org.antlr.v4.runtime.tree.TerminalNode import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.TableIdentifier @@ -31,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution} - +import org.apache.spark.sql.types.DataType /** * Concrete parser for Spark SQL statements. @@ -780,9 +781,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { */ override def visitFailNativeCommand( ctx: FailNativeCommandContext): LogicalPlan = withOrigin(ctx) { - val keywords = if (ctx.kws != null) { - Seq(ctx.kws.kw1, ctx.kws.kw2, ctx.kws.kw3, ctx.kws.kw4, ctx.kws.kw5, ctx.kws.kw6) - .filter(_ != null).map(_.getText).mkString(" ") + val keywords = if (ctx.unsupportedHiveNativeCommands != null) { + ctx.unsupportedHiveNativeCommands.children.asScala.collect { + case n: TerminalNode => n.getText + }.mkString(" ") } else { // SET ROLE is the exception to the rule, because we handle this before other SET commands. "SET ROLE" @@ -1109,7 +1111,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { // just convert the whole type string to lower case, otherwise the struct field names // will no longer be case sensitive. Instead, we rely on our parser to get the proper // case before passing it to Hive. - CatalystSqlParser.parseDataType(col.dataType.getText).catalogString, + typedVisit[DataType](col.dataType).catalogString, nullable = true, Option(col.STRING).map(string)) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala index c97c28c40c..8dc3c64353 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala @@ -254,12 +254,13 @@ class HiveDDLCommandSuite extends PlanTest { } test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") { - val plan = parser.parsePlan( + val analyzer = TestHive.sparkSession.sessionState.analyzer + val plan = analyzer.execute(parser.parsePlan( """ |SELECT * |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test |LATERAL VIEW json_tuple(json, 'f1', 'f2') jt AS a, b - """.stripMargin) + """.stripMargin)) assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple]) } |