import sbt._ import Keys._ import classpath.ClasspathUtilities.isArchive import java.io.FileOutputStream import sbtassembly.Plugin._ import AssemblyKeys._ object SparkBuild extends Build { // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or // "1.0.1" for Apache releases, or "0.20.2-cdh3u3" for Cloudera Hadoop. val HADOOP_VERSION = "0.20.205.0" lazy val root = Project("root", file("."), settings = sharedSettings) aggregate(core, repl, examples, bagel) lazy val core = Project("core", file("core"), settings = coreSettings) lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core) lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn (core) def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", version := "0.6.0-SNAPSHOT", scalaVersion := "2.9.1", scalacOptions := Seq(/*"-deprecation",*/ "-unchecked", "-optimize"), // -deprecation is too noisy due to usage of old Hadoop API, enable it once that's no longer an issue unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, transitiveClassifiers in Scope.GlobalScope := Seq("sources"), testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), publishTo <<= baseDirectory { base => Some(Resolver.file("Local", base / "target" / "maven" asFile)(Patterns(true, Resolver.mavenStyleBasePattern))) }, libraryDependencies ++= Seq( "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011", "org.scalatest" %% "scalatest" % "1.6.1" % "test", "org.scalacheck" %% "scalacheck" % "1.9" % "test", "com.novocode" % "junit-interface" % "0.8" % "test" ), parallelExecution := false, /* Workaround for issue #206 (fixed after SBT 0.11.0) */ watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task, const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) } ) val slf4jVersion = "1.6.1" def coreSettings = sharedSettings ++ Seq( name := "spark-core", resolvers ++= Seq( "Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/", "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/", "Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos/", "Spray Repository" at "http://repo.spray.cc/" ), libraryDependencies ++= Seq( "com.google.guava" % "guava" % "11.0.1", "log4j" % "log4j" % "1.2.16", "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, "com.ning" % "compress-lzf" % "0.8.4", "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION, "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.9", "com.typesafe.akka" % "akka-actor" % "2.0.2", "com.typesafe.akka" % "akka-remote" % "2.0.2", "com.typesafe.akka" % "akka-slf4j" % "2.0.2", "org.jboss.netty" % "netty" % "3.2.6.Final", "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", "cc.spray" % "spray-can" % "1.0-M2.1", "cc.spray" % "spray-server" % "1.0-M2.1" ) ) ++ assemblySettings ++ extraAssemblySettings ++ mergeSettings def replSettings = sharedSettings ++ Seq( name := "spark-repl", libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _) ) ++ assemblySettings ++ extraAssemblySettings ++ mergeSettings def examplesSettings = sharedSettings ++ Seq( name := "spark-examples" ) def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel") // Fix for "No configuration setting found for key 'akka.version'" exception // when running Spark from the jar generated by the "assembly" task; see // http://letitcrash.com/post/21025950392/howto-sbt-assembly-vs-reference-conf lazy val merge = TaskKey[File]("merge-reference", "merge all reference.conf") lazy val mergeSettings: Seq[Project.Setting[_]] = Seq( merge <<= (fullClasspath in assembly) map { c => // collect from all elements of the full classpath val (libs, dirs) = c map (_.data) partition (isArchive) // goal is to simply concatenate files here val dest = file("reference.conf") val out = new FileOutputStream(dest) val append = IO.transfer(_: File, out) try { // first collect from managed sources (dirs * "reference.conf").get foreach append // then from dependency jars by unzipping and // collecting reference.conf if present for (lib <- libs) { IO withTemporaryDirectory { dir => IO.unzip(lib, dir, "reference.conf") (dir * "reference.conf").get foreach append } } // return merged file location as task result dest } finally { out.close() } }, // get rid of the individual files from jars excludedFiles in assembly <<= (excludedFiles in assembly) { (old) => (bases) => old(bases) ++ (bases flatMap (base => (base / "reference.conf").get)) }, // tell sbt-assembly to include our merged file assembledMappings in assembly <<= (assembledMappings in assembly, merge) map { (old, merged) => (f) => old(f) :+(merged, "reference.conf") } ) def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq( mergeStrategy in assembly := { case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard case _ => MergeStrategy.first } ) }