From 970635a9f8d7f64708a9fcae0b231c570f3f2c51 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Fri, 1 Jan 2016 23:22:50 -0800 Subject: [SPARK-12362][SQL][WIP] Inline Hive Parser This PR inlines the Hive SQL parser in Spark SQL. The previous (merged) incarnation of this PR passed all tests, but had and still has problems with the build. These problems are caused by a the fact that - for some reason - in some cases the ANTLR generated code is not included in the compilation fase. This PR is a WIP and should not be merged until we have sorted out the build issues. Author: Herman van Hovell Author: Nong Li Author: Nong Li Closes #10525 from hvanhovell/SPARK-12362. --- project/SparkBuild.scala | 46 ++++++++++++++++++++++++++++++++++++++++++++-- project/plugins.sbt | 2 ++ 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index c3d53f835f..588e97f64e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -414,9 +414,51 @@ object Hive { // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce // in order to generate golden files. This is only required for developers who are adding new // new query tests. - fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } - ) + fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }, + // ANTLR code-generation step. + // + // This has been heavily inspired by com.github.stefri.sbt-antlr (0.5.3). It fixes a number of + // build errors in the current plugin. + // Create Parser from ANTLR grammar files. + sourceGenerators in Compile += Def.task { + val log = streams.value.log + + val grammarFileNames = Seq( + "SparkSqlLexer.g", + "SparkSqlParser.g") + val sourceDir = (sourceDirectory in Compile).value / "antlr3" + val targetDir = (sourceManaged in Compile).value + + // Create default ANTLR Tool. + val antlr = new org.antlr.Tool + + // Setup input and output directories. + antlr.setInputDirectory(sourceDir.getPath) + antlr.setOutputDirectory(targetDir.getPath) + antlr.setForceRelativeOutput(true) + antlr.setMake(true) + + // Add grammar files. + grammarFileNames.flatMap(gFileName => (sourceDir ** gFileName).get).foreach { gFilePath => + val relGFilePath = (gFilePath relativeTo sourceDir).get.getPath + log.info("ANTLR: Grammar file '%s' detected.".format(relGFilePath)) + antlr.addGrammarFile(relGFilePath) + } + // Generate the parser. + antlr.process + if (antlr.getNumErrors > 0) { + log.error("ANTLR: Caught %d build errors.".format(antlr.getNumErrors)) + } + + // Return all generated java files. + (targetDir ** "*.java").get.toSeq + }.taskValue, + // Include ANTLR tokens files. + resourceGenerators in Compile += Def.task { + ((sourceManaged in Compile).value ** "*.tokens").get.toSeq + }.taskValue + ) } object Assembly { diff --git a/project/plugins.sbt b/project/plugins.sbt index 5e23224cf8..15ba3a36d5 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -27,3 +27,5 @@ addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" + +libraryDependencies += "org.antlr" % "antlr" % "3.5.2" -- cgit v1.2.3