From 600c0b69cab4767e8e5a6f4284777d8b9d4bd40e Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Mon, 28 Mar 2016 12:31:12 -0700 Subject: [SPARK-13713][SQL] Migrate parser from ANTLR3 to ANTLR4 ### What changes were proposed in this pull request? The current ANTLR3 parser is quite complex to maintain and suffers from code blow-ups. This PR introduces a new parser that is based on ANTLR4. This parser is based on the [Presto's SQL parser](https://github.com/facebook/presto/blob/master/presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4). The current implementation can parse and create Catalyst and SQL plans. Large parts of the HiveQl DDL and some of the DML functionality is currently missing, the plan is to add this in follow-up PRs. This PR is a work in progress, and work needs to be done in the following area's: - [x] Error handling should be improved. - [x] Documentation should be improved. - [x] Multi-Insert needs to be tested. - [ ] Naming and package locations. ### How was this patch tested? Catalyst and SQL unit tests. Author: Herman van Hovell Closes #11557 from hvanhovell/ngParser. --- project/SparkBuild.scala | 8 ++++++-- project/plugins.sbt | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index fb229b979d..39a9e16f7e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -25,6 +25,7 @@ import sbt._ import sbt.Classpaths.publishTask import sbt.Keys._ import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion +import com.simplytyped.Antlr4Plugin._ import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys} import com.typesafe.tools.mima.plugin.MimaKeys @@ -401,7 +402,10 @@ object OldDeps { } object Catalyst { - lazy val settings = Seq( + lazy val settings = antlr4Settings ++ Seq( + antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser.ng"), + antlr4GenListener in Antlr4 := true, + antlr4GenVisitor in Antlr4 := true, // ANTLR code-generation step. // // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of @@ -414,7 +418,7 @@ object Catalyst { "SparkSqlLexer.g", "SparkSqlParser.g") val sourceDir = (sourceDirectory in Compile).value / "antlr3" - val targetDir = (sourceManaged in Compile).value + val targetDir = (sourceManaged in Compile).value / "antlr3" // Create default ANTLR Tool. val antlr = new org.antlr.Tool diff --git a/project/plugins.sbt b/project/plugins.sbt index eeca94a47c..d9ed7962bf 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -23,3 +23,9 @@ libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" libraryDependencies += "org.antlr" % "antlr" % "3.5.2" + + +// TODO I am not sure we want such a dep. +resolvers += "simplytyped" at "http://simplytyped.github.io/repo/releases" + +addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.10") -- cgit v1.2.3