aboutsummaryrefslogtreecommitdiff
path: root/project/SparkBuild.scala
blob: 3603d42a23f04d0c6ac096ed76ead2763e78da7b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import sbt._
import Keys._
import classpath.ClasspathUtilities.isArchive
import java.io.FileOutputStream
import sbtassembly.Plugin._
import AssemblyKeys._

object SparkBuild extends Build {
  // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or
  // "1.0.1" for Apache releases, or "0.20.2-cdh3u3" for Cloudera Hadoop.
  val HADOOP_VERSION = "0.20.205.0"

  lazy val root = Project("root", file("."), settings = sharedSettings) aggregate(core, repl, examples, bagel)

  lazy val core = Project("core", file("core"), settings = coreSettings)

  lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core)

  lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core)

  lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn (core)

  def sharedSettings = Defaults.defaultSettings ++ Seq(
    organization := "org.spark-project",
    version := "0.6.0-SNAPSHOT",
    scalaVersion := "2.9.1",
    scalacOptions := Seq(/*"-deprecation",*/ "-unchecked", "-optimize"), // -deprecation is too noisy due to usage of old Hadoop API, enable it once that's no longer an issue
    unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath },
    retrieveManaged := true,
    transitiveClassifiers in Scope.GlobalScope := Seq("sources"),
    testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))),
    publishTo <<= baseDirectory { base => Some(Resolver.file("Local", base / "target" / "maven" asFile)(Patterns(true, Resolver.mavenStyleBasePattern))) },
    libraryDependencies ++= Seq(
      "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011",
      "org.scalatest" %% "scalatest" % "1.6.1" % "test",
      "org.scalacheck" %% "scalacheck" % "1.9" % "test",
      "com.novocode" % "junit-interface" % "0.8" % "test"
    ),
    parallelExecution := false,
    /* Workaround for issue #206 (fixed after SBT 0.11.0) */
    watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task,
      const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) }
  )

  val slf4jVersion = "1.6.1"

  def coreSettings = sharedSettings ++ Seq(
    name := "spark-core",
    resolvers ++= Seq(
      "Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/",
      "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/",
      "Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos/",
      "Spray Repository" at "http://repo.spray.cc/"
    ),
    libraryDependencies ++= Seq(
      "com.google.guava" % "guava" % "11.0.1",
      "log4j" % "log4j" % "1.2.16",
      "org.slf4j" % "slf4j-api" % slf4jVersion,
      "org.slf4j" % "slf4j-log4j12" % slf4jVersion,
      "com.ning" % "compress-lzf" % "0.8.4",
      "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION,
      "asm" % "asm-all" % "3.3.1",
      "com.google.protobuf" % "protobuf-java" % "2.4.1",
      "de.javakaffee" % "kryo-serializers" % "0.9",
      "com.typesafe.akka" % "akka-actor" % "2.0.2",
      "com.typesafe.akka" % "akka-remote" % "2.0.2",
      "com.typesafe.akka" % "akka-slf4j" % "2.0.2",
      "org.jboss.netty" % "netty" % "3.2.6.Final",
      "it.unimi.dsi" % "fastutil" % "6.4.4",
      "colt" % "colt" % "1.2.0",
      "cc.spray" % "spray-can" % "1.0-M2.1",
      "cc.spray" % "spray-server" % "1.0-M2.1"
    )
  ) ++ assemblySettings ++ extraAssemblySettings ++ mergeSettings

  def replSettings = sharedSettings ++ Seq(
    name := "spark-repl",
    libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _)
  ) ++ assemblySettings ++ extraAssemblySettings ++ mergeSettings

  def examplesSettings = sharedSettings ++ Seq(
    name := "spark-examples"
  )

  def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")

  // Fix for "No configuration setting found for key 'akka.version'" exception
  // when running Spark from the jar generated by the "assembly" task; see
  // http://letitcrash.com/post/21025950392/howto-sbt-assembly-vs-reference-conf
  lazy val merge = TaskKey[File]("merge-reference",
    "merge all reference.conf")

  lazy val mergeSettings: Seq[Project.Setting[_]] = Seq(
    merge <<= (fullClasspath in assembly) map {
      c =>
      // collect from all elements of the full classpath
        val (libs, dirs) =
          c map (_.data) partition (isArchive)
        // goal is to simply concatenate files here
        val dest = file("reference.conf")
        val out = new FileOutputStream(dest)
        val append = IO.transfer(_: File, out)
        try {
          // first collect from managed sources
          (dirs * "reference.conf").get foreach append
          // then from dependency jars by unzipping and
          // collecting reference.conf if present
          for (lib <- libs) {
            IO withTemporaryDirectory {
              dir =>
                IO.unzip(lib, dir, "reference.conf")
                (dir * "reference.conf").get foreach append
            }
          }
          // return merged file location as task result
          dest
        } finally {
          out.close()
        }
    },

    // get rid of the individual files from jars
    excludedFiles in assembly <<=
      (excludedFiles in assembly) {
        (old) => (bases) =>
          old(bases) ++ (bases flatMap (base =>
            (base / "reference.conf").get))
      },

    // tell sbt-assembly to include our merged file
    assembledMappings in assembly <<=
      (assembledMappings in assembly, merge) map {
        (old, merged) => (f) =>
          old(f) :+(merged, "reference.conf")
      }
  )

  def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq(
    mergeStrategy in assembly := { 
      case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard 
      case _ => MergeStrategy.first
    }
  ) 

}