diff options
Diffstat (limited to 'project/SparkBuild.scala')
-rw-r--r-- | project/SparkBuild.scala | 142 |
1 files changed, 103 insertions, 39 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 2eef2dfa5e..f794918d22 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -20,6 +20,7 @@ import sbt.Classpaths.publishTask import Keys._ import sbtassembly.Plugin._ import AssemblyKeys._ +import scala.util.Properties // For Sonatype publishing //import com.jsuereth.pgp.sbtplugin.PgpKeys._ @@ -49,9 +50,6 @@ object SparkBuild extends Build { lazy val repl = Project("repl", file("repl"), settings = replSettings) .dependsOn(core, bagel, mllib) - lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, bagel, streaming) - lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core) @@ -68,10 +66,11 @@ object SparkBuild extends Build { // A configuration to set an alternative publishLocalConfiguration lazy val MavenCompile = config("m2r") extend(Compile) lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") + val sparkHome = System.getProperty("user.dir") // Allows build configuration to be set through environment variables - lazy val hadoopVersion = scala.util.Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) - lazy val isNewHadoop = scala.util.Properties.envOrNone("SPARK_IS_NEW_HADOOP") match { + lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION) + lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match { case None => { val isNewHadoopVersion = "2.[2-9]+".r.findFirstIn(hadoopVersion).isDefined (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP) @@ -79,23 +78,43 @@ object SparkBuild extends Build { case Some(v) => v.toBoolean } - lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_YARN") match { + lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match { case None => DEFAULT_YARN case Some(v) => v.toBoolean } // Conditionally include the yarn sub-project - lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core) - - //lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core) - - lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() - lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]() + lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) + lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core) + + lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]() + lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]() + + lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings) + .dependsOn(streaming % "compile->compile;test->test") + + lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings) + .dependsOn(streaming % "compile->compile;test->test") + + lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings) + .dependsOn(streaming % "compile->compile;test->test") + + lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings) + .dependsOn(streaming % "compile->compile;test->test") + + lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings) + .dependsOn(streaming % "compile->compile;test->test") + + lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt) + lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt) + + lazy val examples = Project("examples", file("examples"), settings = examplesSettings) + .dependsOn(core, mllib, bagel, streaming, externalTwitter) dependsOn(allExternal: _*) // Everything except assembly, tools and examples belong to packageProjects lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib) ++ maybeYarnRef - lazy val allProjects = packageProjects ++ Seq[ProjectReference](examples, tools, assemblyProj) + lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", @@ -112,8 +131,9 @@ object SparkBuild extends Build { // Fork new JVMs for tests and set Java options for those fork := true, + javaOptions in Test += "-Dspark.home=" + sparkHome, + javaOptions in Test += "-Dspark.testing=1", javaOptions += "-Xmx3g", - // Show full stack trace and duration in test cases. testOptions in Test += Tests.Argument("-oDF"), @@ -123,7 +143,7 @@ object SparkBuild extends Build { // also check the local Maven repository ~/.m2 resolvers ++= Seq(Resolver.file("Local Maven Repo", file(Path.userHome + "/.m2/repository"))), - // For Sonatype publishing + // For Sonatype publishing resolvers ++= Seq("sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/"), @@ -165,7 +185,7 @@ object SparkBuild extends Build { </issueManagement> ), -/* + /* publishTo <<= version { (v: String) => val nexus = "https://oss.sonatype.org/" if (v.trim.endsWith("SNAPSHOT")) @@ -174,8 +194,7 @@ object SparkBuild extends Build { Some("sonatype-staging" at nexus + "service/local/staging/deploy/maven2") }, -*/ - + */ libraryDependencies ++= Seq( "io.netty" % "netty-all" % "4.0.13.Final", @@ -226,11 +245,12 @@ object SparkBuild extends Build { "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, "commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407 - "com.ning" % "compress-lzf" % "0.8.4", + "com.ning" % "compress-lzf" % "1.0.0", "org.xerial.snappy" % "snappy-java" % "1.0.5", "org.ow2.asm" % "asm" % "4.0", "org.spark-project.akka" %% "akka-remote" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty), "org.spark-project.akka" %% "akka-slf4j" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty), + "org.spark-project.akka" %% "akka-testkit" % "2.2.3-shaded-protobuf" % "test", "net.liftweb" %% "lift-json" % "2.5.1" excludeAll(excludeNetty), "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", @@ -264,7 +284,6 @@ object SparkBuild extends Build { libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v ) ) - def examplesSettings = sharedSettings ++ Seq( name := "spark-examples", libraryDependencies ++= Seq( @@ -301,29 +320,34 @@ object SparkBuild extends Build { def streamingSettings = sharedSettings ++ Seq( name := "spark-streaming", - resolvers ++= Seq( - "Eclipse Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/", - "Apache repo" at "https://repository.apache.org/content/repositories/releases" - ), - libraryDependencies ++= Seq( - "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy), - "com.sksamuel.kafka" %% "kafka" % "0.8.0-beta1" - exclude("com.sun.jdmk", "jmxtools") - exclude("com.sun.jmx", "jmxri") - exclude("net.sf.jopt-simple", "jopt-simple") - excludeAll(excludeNetty), - "org.eclipse.paho" % "mqtt-client" % "0.4.0", - "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), - "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), - "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty) + "commons-io" % "commons-io" % "2.4" ) ) - def yarnSettings = sharedSettings ++ Seq( - name := "spark-yarn" + def yarnCommonSettings = sharedSettings ++ Seq( + unmanagedSourceDirectories in Compile <++= baseDirectory { base => + Seq( + base / "../common/src/main/scala" + ) + }, + + unmanagedSourceDirectories in Test <++= baseDirectory { base => + Seq( + base / "../common/src/test/scala" + ) + } + ) ++ extraYarnSettings + def yarnAlphaSettings = yarnCommonSettings ++ Seq( + name := "spark-yarn-alpha" + ) + + def yarnSettings = yarnCommonSettings ++ Seq( + name := "spark-yarn" + ) + // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq() @@ -331,14 +355,15 @@ object SparkBuild extends Build { def yarnEnabledSettings = Seq( libraryDependencies ++= Seq( // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), - "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib), "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm, excludeCglib) ) ) def assemblyProjSettings = sharedSettings ++ Seq( + libraryDependencies += "net.sf.py4j" % "py4j" % "0.8.1", name := "spark-assembly", assembleDeps in Compile <<= (packageProjects.map(packageBin in Compile in _) ++ Seq(packageDependency in Compile)).dependOn, jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }, @@ -356,4 +381,43 @@ object SparkBuild extends Build { case _ => MergeStrategy.first } ) + + def twitterSettings() = sharedSettings ++ Seq( + name := "spark-streaming-twitter", + libraryDependencies ++= Seq( + "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty) + ) + ) + + def kafkaSettings() = sharedSettings ++ Seq( + name := "spark-streaming-kafka", + libraryDependencies ++= Seq( + "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), + "com.sksamuel.kafka" %% "kafka" % "0.8.0-beta1" + exclude("com.sun.jdmk", "jmxtools") + exclude("com.sun.jmx", "jmxri") + exclude("net.sf.jopt-simple", "jopt-simple") + excludeAll(excludeNetty) + ) + ) + + def flumeSettings() = sharedSettings ++ Seq( + name := "spark-streaming-flume", + libraryDependencies ++= Seq( + "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy) + ) + ) + + def zeromqSettings() = sharedSettings ++ Seq( + name := "spark-streaming-zeromq", + libraryDependencies ++= Seq( + "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty) + ) + ) + + def mqttSettings() = streamingSettings ++ Seq( + name := "spark-streaming-mqtt", + resolvers ++= Seq("Eclipse Repo" at "https://repo.eclipse.org/content/repositories/paho-releases/"), + libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0") + ) } |