diff options
Diffstat (limited to 'project/SparkBuild.scala')
-rw-r--r-- | project/SparkBuild.scala | 76 |
1 files changed, 69 insertions, 7 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index aff191c98b..e4ad659125 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -52,7 +52,7 @@ object SparkBuild extends Build { lazy val core = Project("core", file("core"), settings = coreSettings) lazy val repl = Project("repl", file("repl"), settings = replSettings) - .dependsOn(core, graphx, bagel, mllib) + .dependsOn(core, graphx, bagel, mllib, sql) lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) @@ -60,12 +60,19 @@ object SparkBuild extends Build { lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core) + lazy val catalyst = Project("catalyst", file("sql/catalyst"), settings = catalystSettings) dependsOn(core) + + lazy val sql = Project("sql", file("sql/core"), settings = sqlCoreSettings) dependsOn(core, catalyst) + + // Since hive is its own assembly, it depends on all of the modules. + lazy val hive = Project("hive", file("sql/hive"), settings = hiveSettings) dependsOn(sql, graphx, bagel, mllib, streaming, repl) + lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core) lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core) lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings) - .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) dependsOn(maybeGanglia: _*) + .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeGanglia: _*) lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects") @@ -131,13 +138,13 @@ object SparkBuild extends Build { lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*) + .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter, hive) dependsOn(allExternal: _*) - // Everything except assembly, tools, java8Tests and examples belong to packageProjects - lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef ++ maybeGangliaRef + // Everything except assembly, hive, tools, java8Tests and examples belong to packageProjects + lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeGangliaRef lazy val allProjects = packageProjects ++ allExternalRefs ++ - Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests + Seq[ProjectReference](examples, tools, assemblyProj, hive) ++ maybeJava8Tests def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", @@ -164,7 +171,7 @@ object SparkBuild extends Build { // Show full stack trace and duration in test cases. testOptions in Test += Tests.Argument("-oDF"), // Remove certain packages from Scaladoc - scalacOptions in (Compile,doc) := Seq("-skip-packages", Seq( + scalacOptions in (Compile,doc) := Seq("-groups", "-skip-packages", Seq( "akka", "org.apache.spark.network", "org.apache.spark.deploy", @@ -362,6 +369,61 @@ object SparkBuild extends Build { ) ) + def catalystSettings = sharedSettings ++ Seq( + name := "catalyst", + // The mechanics of rewriting expression ids to compare trees in some test cases makes + // assumptions about the the expression ids being contiguious. Running tests in parallel breaks + // this non-deterministically. TODO: FIX THIS. + parallelExecution in Test := false, + libraryDependencies ++= Seq( + "org.scalatest" %% "scalatest" % "1.9.1" % "test", + "com.typesafe" %% "scalalogging-slf4j" % "1.0.1" + ) + ) + + def sqlCoreSettings = sharedSettings ++ Seq( + name := "spark-sql", + libraryDependencies ++= Seq( + "com.twitter" % "parquet-column" % "1.3.2", + "com.twitter" % "parquet-hadoop" % "1.3.2" + ) + ) + + // Since we don't include hive in the main assembly this project also acts as an alternative + // assembly jar. + def hiveSettings = sharedSettings ++ assemblyProjSettings ++ Seq( + name := "spark-hive", + jarName in assembly <<= version map { v => "spark-hive-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }, + jarName in packageDependency <<= version map { v => "spark-hive-assembly-" + v + "-hadoop" + hadoopVersion + "-deps.jar" }, + javaOptions += "-XX:MaxPermSize=1g", + libraryDependencies ++= Seq( + "org.apache.hive" % "hive-metastore" % "0.12.0", + "org.apache.hive" % "hive-exec" % "0.12.0", + "org.apache.hive" % "hive-serde" % "0.12.0" + ), + // Multiple queries rely on the TestHive singleton. See comments there for more details. + parallelExecution in Test := false, + // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings + // only for this subproject. + scalacOptions <<= scalacOptions map { currentOpts: Seq[String] => + currentOpts.filterNot(_ == "-deprecation") + }, + initialCommands in console := + """ + |import org.apache.spark.sql.catalyst.analysis._ + |import org.apache.spark.sql.catalyst.dsl._ + |import org.apache.spark.sql.catalyst.errors._ + |import org.apache.spark.sql.catalyst.expressions._ + |import org.apache.spark.sql.catalyst.plans.logical._ + |import org.apache.spark.sql.catalyst.rules._ + |import org.apache.spark.sql.catalyst.types._ + |import org.apache.spark.sql.catalyst.util._ + |import org.apache.spark.sql.execution + |import org.apache.spark.sql.hive._ + |import org.apache.spark.sql.hive.TestHive._ + |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin + ) + def streamingSettings = sharedSettings ++ Seq( name := "spark-streaming", libraryDependencies ++= Seq( |