diff options
author | Herman van Hovell <hvanhovell@questtec.nl> | 2016-01-01 23:22:50 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-01-01 23:22:50 -0800 |
commit | 970635a9f8d7f64708a9fcae0b231c570f3f2c51 (patch) | |
tree | cfdb9289f0ddb1cf0c1811f5695b12a14821f0a2 /project | |
parent | 44ee920fd49d35b421ae562ea99bcc8f2b98ced6 (diff) | |
download | spark-970635a9f8d7f64708a9fcae0b231c570f3f2c51.tar.gz spark-970635a9f8d7f64708a9fcae0b231c570f3f2c51.tar.bz2 spark-970635a9f8d7f64708a9fcae0b231c570f3f2c51.zip |
[SPARK-12362][SQL][WIP] Inline Hive Parser
This PR inlines the Hive SQL parser in Spark SQL.
The previous (merged) incarnation of this PR passed all tests, but had and still has problems with the build. These problems are caused by a the fact that - for some reason - in some cases the ANTLR generated code is not included in the compilation fase.
This PR is a WIP and should not be merged until we have sorted out the build issues.
Author: Herman van Hovell <hvanhovell@questtec.nl>
Author: Nong Li <nong@databricks.com>
Author: Nong Li <nongli@gmail.com>
Closes #10525 from hvanhovell/SPARK-12362.
Diffstat (limited to 'project')
-rw-r--r-- | project/SparkBuild.scala | 46 | ||||
-rw-r--r-- | project/plugins.sbt | 2 |
2 files changed, 46 insertions, 2 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index c3d53f835f..588e97f64e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -414,9 +414,51 @@ object Hive { // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce // in order to generate golden files. This is only required for developers who are adding new // new query tests. - fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } - ) + fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }, + // ANTLR code-generation step. + // + // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of + // build errors in the current plugin. + // Create Parser from ANTLR grammar files. + sourceGenerators in Compile += Def.task { + val log = streams.value.log + + val grammarFileNames = Seq( + "SparkSqlLexer.g", + "SparkSqlParser.g") + val sourceDir = (sourceDirectory in Compile).value / "antlr3" + val targetDir = (sourceManaged in Compile).value + + // Create default ANTLR Tool. + val antlr = new org.antlr.Tool + + // Setup input and output directories. + antlr.setInputDirectory(sourceDir.getPath) + antlr.setOutputDirectory(targetDir.getPath) + antlr.setForceRelativeOutput(true) + antlr.setMake(true) + + // Add grammar files. + grammarFileNames.flatMap(gFileName => (sourceDir ** gFileName).get).foreach { gFilePath => + val relGFilePath = (gFilePath relativeTo sourceDir).get.getPath + log.info("ANTLR: Grammar file '%s' detected.".format(relGFilePath)) + antlr.addGrammarFile(relGFilePath) + } + // Generate the parser. + antlr.process + if (antlr.getNumErrors > 0) { + log.error("ANTLR: Caught %d build errors.".format(antlr.getNumErrors)) + } + + // Return all generated java files. + (targetDir ** "*.java").get.toSeq + }.taskValue, + // Include ANTLR tokens files. + resourceGenerators in Compile += Def.task { + ((sourceManaged in Compile).value ** "*.tokens").get.toSeq + }.taskValue + ) } object Assembly { diff --git a/project/plugins.sbt b/project/plugins.sbt index 5e23224cf8..15ba3a36d5 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -27,3 +27,5 @@ addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" + +libraryDependencies += "org.antlr" % "antlr" % "3.5.2" |