aboutsummaryrefslogtreecommitdiff
path: root/project/SparkBuild.scala
diff options
context:
space:
mode:
authorPrashant Sharma <prashant.s@imaginea.com>2014-07-10 11:03:37 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-07-10 11:03:37 -0700
commit628932b8d0dbbc6c68c61d4bca1c504f38684c2a (patch)
treeb388e5d9bf73fee4fa8d2a4ddd125688d7f65250 /project/SparkBuild.scala
parentc2babc089bd97137d240ca119ec4fc4b5a1422c8 (diff)
downloadspark-628932b8d0dbbc6c68c61d4bca1c504f38684c2a.tar.gz
spark-628932b8d0dbbc6c68c61d4bca1c504f38684c2a.tar.bz2
spark-628932b8d0dbbc6c68c61d4bca1c504f38684c2a.zip
[SPARK-1776] Have Spark's SBT build read dependencies from Maven.
Patch introduces the new way of working also retaining the existing ways of doing things. For example build instruction for yarn in maven is `mvn -Pyarn -PHadoop2.2 clean package -DskipTests` in sbt it can become `MAVEN_PROFILES="yarn, hadoop-2.2" sbt/sbt clean assembly` Also supports `sbt/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 clean assembly` Author: Prashant Sharma <prashant.s@imaginea.com> Author: Patrick Wendell <pwendell@gmail.com> Closes #772 from ScrapCodes/sbt-maven and squashes the following commits: a8ac951 [Prashant Sharma] Updated sbt version. 62b09bb [Prashant Sharma] Improvements. fa6221d [Prashant Sharma] Excluding sql from mima 4b8875e [Prashant Sharma] Sbt assembly no longer builds tools by default. 72651ca [Prashant Sharma] Addresses code reivew comments. acab73d [Prashant Sharma] Revert "Small fix to run-examples script." ac4312c [Prashant Sharma] Revert "minor fix" 6af91ac [Prashant Sharma] Ported oldDeps back. + fixes issues with prev commit. 65cf06c [Prashant Sharma] Servelet API jars mess up with the other servlet jars on the class path. 446768e [Prashant Sharma] minor fix 89b9777 [Prashant Sharma] Merge conflicts d0a02f2 [Prashant Sharma] Bumped up pom versions, Since the build now depends on pom it is better updated there. + general cleanups. dccc8ac [Prashant Sharma] updated mima to check against 1.0 a49c61b [Prashant Sharma] Fix for tools jar a2f5ae1 [Prashant Sharma] Fixes a bug in dependencies. cf88758 [Prashant Sharma] cleanup 9439ea3 [Prashant Sharma] Small fix to run-examples script. 96cea1f [Prashant Sharma] SPARK-1776 Have Spark's SBT build read dependencies from Maven. 36efa62 [Patrick Wendell] Set project name in pom files and added eclipse/intellij plugins. 4973dbd [Patrick Wendell] Example build using pom reader.
Diffstat (limited to 'project/SparkBuild.scala')
-rw-r--r--project/SparkBuild.scala780
1 files changed, 201 insertions, 579 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 599714233c..b55c50560b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -15,524 +15,159 @@
* limitations under the License.
*/
-import sbt._
-import sbt.Classpaths.publishTask
-import sbt.Keys._
-import sbtassembly.Plugin._
-import AssemblyKeys._
import scala.util.Properties
-import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
-import com.typesafe.tools.mima.plugin.MimaKeys.previousArtifact
-import sbtunidoc.Plugin._
-import UnidocKeys._
-
import scala.collection.JavaConversions._
-// For Sonatype publishing
-// import com.jsuereth.pgp.sbtplugin.PgpKeys._
-
-object SparkBuild extends Build {
- val SPARK_VERSION = "1.1.0-SNAPSHOT"
- val SPARK_VERSION_SHORT = SPARK_VERSION.replaceAll("-SNAPSHOT", "")
-
- // Hadoop version to build against. For example, "1.0.4" for Apache releases, or
- // "2.0.0-mr1-cdh4.2.0" for Cloudera Hadoop. Note that these variables can be set
- // through the environment variables SPARK_HADOOP_VERSION and SPARK_YARN.
- val DEFAULT_HADOOP_VERSION = "1.0.4"
-
- // Whether the Hadoop version to build against is 2.2.x, or a variant of it. This can be set
- // through the SPARK_IS_NEW_HADOOP environment variable.
- val DEFAULT_IS_NEW_HADOOP = false
-
- val DEFAULT_YARN = false
-
- val DEFAULT_HIVE = false
-
- // HBase version; set as appropriate.
- val HBASE_VERSION = "0.94.6"
-
- // Target JVM version
- val SCALAC_JVM_VERSION = "jvm-1.6"
- val JAVAC_JVM_VERSION = "1.6"
-
- lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*)
-
- lazy val core = Project("core", file("core"), settings = coreSettings)
-
- /** Following project only exists to pull previous artifacts of Spark for generating
- Mima ignores. For more information see: SPARK 2071 */
- lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings)
-
- def replDependencies = Seq[ProjectReference](core, graphx, bagel, mllib, sql) ++ maybeHiveRef
-
- lazy val repl = Project("repl", file("repl"), settings = replSettings)
- .dependsOn(replDependencies.map(a => a: sbt.ClasspathDep[sbt.ProjectReference]): _*)
-
- lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
+import sbt._
+import sbt.Keys._
+import org.scalastyle.sbt.ScalastylePlugin.{Settings => ScalaStyleSettings}
+import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
+import net.virtualvoid.sbt.graph.Plugin.graphSettings
- lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
+object BuildCommons {
- lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core)
+ private val buildLocation = file(".").getAbsoluteFile.getParentFile
- lazy val catalyst = Project("catalyst", file("sql/catalyst"), settings = catalystSettings) dependsOn(core)
+ val allProjects@Seq(bagel, catalyst, core, graphx, hive, mllib, repl, spark, sql, streaming,
+ streamingFlume, streamingKafka, streamingMqtt, streamingTwitter, streamingZeromq) =
+ Seq("bagel", "catalyst", "core", "graphx", "hive", "mllib", "repl", "spark", "sql",
+ "streaming", "streaming-flume", "streaming-kafka", "streaming-mqtt", "streaming-twitter",
+ "streaming-zeromq").map(ProjectRef(buildLocation, _))
- lazy val sql = Project("sql", file("sql/core"), settings = sqlCoreSettings) dependsOn(core) dependsOn(catalyst % "compile->compile;test->test")
+ val optionallyEnabledProjects@Seq(yarn, yarnStable, yarnAlpha, java8Tests, sparkGangliaLgpl) =
+ Seq("yarn", "yarn-stable", "yarn-alpha", "java8-tests", "ganglia-lgpl")
+ .map(ProjectRef(buildLocation, _))
- lazy val hive = Project("hive", file("sql/hive"), settings = hiveSettings) dependsOn(sql)
+ val assemblyProjects@Seq(assembly, examples) = Seq("assembly", "examples")
+ .map(ProjectRef(buildLocation, _))
- lazy val maybeHive: Seq[ClasspathDependency] = if (isHiveEnabled) Seq(hive) else Seq()
- lazy val maybeHiveRef: Seq[ProjectReference] = if (isHiveEnabled) Seq(hive) else Seq()
+ val tools = "tools"
- lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
+ val sparkHome = buildLocation
+}
- lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
+object SparkBuild extends PomBuild {
- lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
- .dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
+ import BuildCommons._
+ import scala.collection.mutable.Map
- lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
- lazy val assembleDeps = assembleDepsTask := {
- println()
- println("**** NOTE ****")
- println("'sbt/sbt assemble-deps' is no longer supported.")
- println("Instead create a normal assembly and:")
- println(" export SPARK_PREPEND_CLASSES=1 (toggle on)")
- println(" unset SPARK_PREPEND_CLASSES (toggle off)")
- println()
- }
+ val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
- // A configuration to set an alternative publishLocalConfiguration
- lazy val MavenCompile = config("m2r") extend(Compile)
- lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
- val sparkHome = System.getProperty("user.dir")
-
- // Allows build configuration to be set through environment variables
- lazy val hadoopVersion = Properties.envOrElse("SPARK_HADOOP_VERSION", DEFAULT_HADOOP_VERSION)
- lazy val isNewHadoop = Properties.envOrNone("SPARK_IS_NEW_HADOOP") match {
- case None => {
- val isNewHadoopVersion = "^2\\.[2-9]+".r.findFirstIn(hadoopVersion).isDefined
- (isNewHadoopVersion|| DEFAULT_IS_NEW_HADOOP)
+ // Provides compatibility for older versions of the Spark build
+ def backwardCompatibility = {
+ import scala.collection.mutable
+ var isAlphaYarn = false
+ var profiles: mutable.Seq[String] = mutable.Seq.empty
+ if (Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined) {
+ println("NOTE: SPARK_GANGLIA_LGPL is deprecated, please use -Pganglia-lgpl flag.")
+ profiles ++= Seq("spark-ganglia-lgpl")
}
- case Some(v) => v.toBoolean
- }
-
- lazy val isYarnEnabled = Properties.envOrNone("SPARK_YARN") match {
- case None => DEFAULT_YARN
- case Some(v) => v.toBoolean
+ if (Properties.envOrNone("SPARK_HIVE").isDefined) {
+ println("NOTE: SPARK_HIVE is deprecated, please use -Phive flag.")
+ profiles ++= Seq("hive")
+ }
+ Properties.envOrNone("SPARK_HADOOP_VERSION") match {
+ case Some(v) =>
+ if (v.matches("0.23.*")) isAlphaYarn = true
+ println("NOTE: SPARK_HADOOP_VERSION is deprecated, please use -Dhadoop.version=" + v)
+ System.setProperty("hadoop.version", v)
+ case None =>
+ }
+ if (Properties.envOrNone("SPARK_YARN").isDefined) {
+ if(isAlphaYarn) {
+ println("NOTE: SPARK_YARN is deprecated, please use -Pyarn-alpha flag.")
+ profiles ++= Seq("yarn-alpha")
+ }
+ else {
+ println("NOTE: SPARK_YARN is deprecated, please use -Pyarn flag.")
+ profiles ++= Seq("yarn")
+ }
+ }
+ profiles
}
- lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
- val maybeAvro = if (hadoopVersion.startsWith("0.23.")) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
- lazy val isHiveEnabled = Properties.envOrNone("SPARK_HIVE") match {
- case None => DEFAULT_HIVE
- case Some(v) => v.toBoolean
+ override val profiles = Properties.envOrNone("MAVEN_PROFILES") match {
+ case None => backwardCompatibility
+ // Rationale: If -P option exists no need to support backwardCompatibility.
+ case Some(v) =>
+ if (backwardCompatibility.nonEmpty)
+ println("Note: We ignore environment variables, when use of profile is detected in " +
+ "conjunction with environment variable.")
+ v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
}
- // Include Ganglia integration if the user has enabled Ganglia
- // This is isolated from the normal build due to LGPL-licensed code in the library
- lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined
- lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core)
- val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
- val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
+ override val userPropertiesMap = System.getProperties.toMap
- // Include the Java 8 project if the JVM version is 8+
- lazy val javaVersion = System.getProperty("java.specification.version")
- lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
- val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]()
- lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings).
- dependsOn(core) dependsOn(streaming % "compile->compile;test->test")
-
- // Include the YARN project if the user has enabled YARN
- lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
- lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)
-
- lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
- lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
-
- lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
- .dependsOn(streaming % "compile->compile;test->test")
-
- lazy val externalKafka = Project("external-kafka", file("external/kafka"), settings = kafkaSettings)
- .dependsOn(streaming % "compile->compile;test->test")
-
- lazy val externalFlume = Project("external-flume", file("external/flume"), settings = flumeSettings)
- .dependsOn(streaming % "compile->compile;test->test")
-
- lazy val externalZeromq = Project("external-zeromq", file("external/zeromq"), settings = zeromqSettings)
- .dependsOn(streaming % "compile->compile;test->test")
-
- lazy val externalMqtt = Project("external-mqtt", file("external/mqtt"), settings = mqttSettings)
- .dependsOn(streaming % "compile->compile;test->test")
-
- lazy val allExternal = Seq[ClasspathDependency](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
- lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
-
- lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
- .dependsOn(core, mllib, graphx, bagel, streaming, hive) dependsOn(allExternal: _*)
-
- // Everything except assembly, hive, tools, java8Tests and examples belong to packageProjects
- lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx, catalyst, sql) ++ maybeYarnRef ++ maybeHiveRef ++ maybeGangliaRef
-
- lazy val allProjects = packageProjects ++ allExternalRefs ++
- Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests
-
- def sharedSettings = Defaults.defaultSettings ++ MimaBuild.mimaSettings(file(sparkHome)) ++ Seq(
- organization := "org.apache.spark",
- version := SPARK_VERSION,
- scalaVersion := "2.10.4",
- scalacOptions := Seq("-Xmax-classfile-name", "120", "-unchecked", "-deprecation", "-feature",
- "-target:" + SCALAC_JVM_VERSION),
- javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION),
- unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath },
+ lazy val sharedSettings = graphSettings ++ ScalaStyleSettings ++ Seq (
+ javaHome := Properties.envOrNone("JAVA_HOME").map(file),
+ incOptions := incOptions.value.withNameHashing(true),
retrieveManaged := true,
- javaHome := Properties.envOrNone("JAVA_HOME").map(file),
- // This is to add convenience of enabling sbt -Dsbt.offline=true for making the build offline.
- offline := "true".equalsIgnoreCase(sys.props("sbt.offline")),
retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
- transitiveClassifiers in Scope.GlobalScope := Seq("sources"),
- testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))),
- incOptions := incOptions.value.withNameHashing(true),
- // Fork new JVMs for tests and set Java options for those
- fork := true,
- javaOptions in Test += "-Dspark.home=" + sparkHome,
- javaOptions in Test += "-Dspark.testing=1",
- javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
- javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark").map { case (k,v) => s"-D$k=$v" }.toSeq,
- javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g".split(" ").toSeq,
- javaOptions += "-Xmx3g",
- // Show full stack trace and duration in test cases.
- testOptions in Test += Tests.Argument("-oDF"),
- // Remove certain packages from Scaladoc
- scalacOptions in (Compile, doc) := Seq(
- "-groups",
- "-skip-packages", Seq(
- "akka",
- "org.apache.spark.api.python",
- "org.apache.spark.network",
- "org.apache.spark.deploy",
- "org.apache.spark.util.collection"
- ).mkString(":"),
- "-doc-title", "Spark " + SPARK_VERSION_SHORT + " ScalaDoc"
- ),
-
- // Only allow one test at a time, even across projects, since they run in the same JVM
- concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
-
- resolvers ++= Seq(
- // HTTPS is unavailable for Maven Central
- "Maven Repository" at "http://repo.maven.apache.org/maven2",
- "Apache Repository" at "https://repository.apache.org/content/repositories/releases",
- "JBoss Repository" at "https://repository.jboss.org/nexus/content/repositories/releases/",
- "MQTT Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/",
- "Cloudera Repository" at "http://repository.cloudera.com/artifactory/cloudera-repos/",
- "Pivotal Repository" at "http://repo.spring.io/libs-release/",
- // For Sonatype publishing
- // "sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots",
- // "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/",
- // also check the local Maven repository ~/.m2
- Resolver.mavenLocal
- ),
-
- publishMavenStyle := true,
-
- // useGpg in Global := true,
-
- pomExtra := (
- <parent>
- <groupId>org.apache</groupId>
- <artifactId>apache</artifactId>
- <version>14</version>
- </parent>
- <url>http://spark.apache.org/</url>
- <licenses>
- <license>
- <name>Apache 2.0 License</name>
- <url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
- <distribution>repo</distribution>
- </license>
- </licenses>
- <scm>
- <connection>scm:git:git@github.com:apache/spark.git</connection>
- <url>scm:git:git@github.com:apache/spark.git</url>
- </scm>
- <developers>
- <developer>
- <id>matei</id>
- <name>Matei Zaharia</name>
- <email>matei.zaharia@gmail.com</email>
- <url>http://www.cs.berkeley.edu/~matei</url>
- <organization>Apache Software Foundation</organization>
- <organizationUrl>http://spark.apache.org</organizationUrl>
- </developer>
- </developers>
- <issueManagement>
- <system>JIRA</system>
- <url>https://issues.apache.org/jira/browse/SPARK</url>
- </issueManagement>
- ),
-
- /*
- publishTo <<= version { (v: String) =>
- val nexus = "https://oss.sonatype.org/"
- if (v.trim.endsWith("SNAPSHOT"))
- Some("sonatype-snapshots" at nexus + "content/repositories/snapshots")
- else
- Some("sonatype-staging" at nexus + "service/local/staging/deploy/maven2")
- },
+ publishMavenStyle := true
+ )
- */
-
- libraryDependencies ++= Seq(
- "io.netty" % "netty-all" % "4.0.17.Final",
- "org.eclipse.jetty" % "jetty-server" % jettyVersion,
- "org.eclipse.jetty" % "jetty-util" % jettyVersion,
- "org.eclipse.jetty" % "jetty-plus" % jettyVersion,
- "org.eclipse.jetty" % "jetty-security" % jettyVersion,
- "org.scalatest" %% "scalatest" % "2.1.5" % "test",
- "org.scalacheck" %% "scalacheck" % "1.11.3" % "test",
- "com.novocode" % "junit-interface" % "0.10" % "test",
- "org.easymock" % "easymockclassextension" % "3.1" % "test",
- "org.mockito" % "mockito-all" % "1.9.0" % "test",
- "junit" % "junit" % "4.10" % "test",
- // Needed by cglib which is needed by easymock.
- "asm" % "asm" % "3.3.1" % "test"
- ),
+ /** Following project only exists to pull previous artifacts of Spark for generating
+ Mima ignores. For more information see: SPARK 2071 */
+ lazy val oldDeps = Project("oldDeps", file("dev"), settings = oldDepsSettings)
- testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
- parallelExecution := true,
- /* Workaround for issue #206 (fixed after SBT 0.11.0) */
- watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task,
- const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) },
-
- otherResolvers := Seq(Resolver.file("dotM2", file(Path.userHome + "/.m2/repository"))),
- publishLocalConfiguration in MavenCompile <<= (packagedArtifacts, deliverLocal, ivyLoggingLevel) map {
- (arts, _, level) => new PublishConfiguration(None, "dotM2", arts, Seq(), level)
- },
- publishMavenStyle in MavenCompile := true,
- publishLocal in MavenCompile <<= publishTask(publishLocalConfiguration in MavenCompile, deliverLocal),
- publishLocalBoth <<= Seq(publishLocal in MavenCompile, publishLocal).dependOn
- ) ++ net.virtualvoid.sbt.graph.Plugin.graphSettings ++ ScalaStyleSettings ++ genjavadocSettings
-
- val akkaVersion = "2.2.3-shaded-protobuf"
- val chillVersion = "0.3.6"
- val codahaleMetricsVersion = "3.0.0"
- val jblasVersion = "1.2.3"
- val jets3tVersion = if ("^2\\.[3-9]+".r.findFirstIn(hadoopVersion).isDefined) "0.9.0" else "0.7.1"
- val jettyVersion = "8.1.14.v20131031"
- val hiveVersion = "0.12.0"
- val parquetVersion = "1.4.3"
- val slf4jVersion = "1.7.5"
-
- val excludeJBossNetty = ExclusionRule(organization = "org.jboss.netty")
- val excludeIONetty = ExclusionRule(organization = "io.netty")
- val excludeEclipseJetty = ExclusionRule(organization = "org.eclipse.jetty")
- val excludeAsm = ExclusionRule(organization = "org.ow2.asm")
- val excludeOldAsm = ExclusionRule(organization = "asm")
- val excludeCommonsLogging = ExclusionRule(organization = "commons-logging")
- val excludeSLF4J = ExclusionRule(organization = "org.slf4j")
- val excludeScalap = ExclusionRule(organization = "org.scala-lang", artifact = "scalap")
- val excludeHadoop = ExclusionRule(organization = "org.apache.hadoop")
- val excludeCurator = ExclusionRule(organization = "org.apache.curator")
- val excludePowermock = ExclusionRule(organization = "org.powermock")
- val excludeFastutil = ExclusionRule(organization = "it.unimi.dsi")
- val excludeJruby = ExclusionRule(organization = "org.jruby")
- val excludeThrift = ExclusionRule(organization = "org.apache.thrift")
- val excludeServletApi = ExclusionRule(organization = "javax.servlet", artifact = "servlet-api")
- val excludeJUnit = ExclusionRule(organization = "junit")
-
- def sparkPreviousArtifact(id: String, organization: String = "org.apache.spark",
- version: String = "1.0.0", crossVersion: String = "2.10"): Option[sbt.ModuleID] = {
- val fullId = if (crossVersion.isEmpty) id else id + "_" + crossVersion
- Some(organization % fullId % version) // the artifact to compare binary compatibility with
+ def versionArtifact(id: String): Option[sbt.ModuleID] = {
+ val fullId = id + "_2.10"
+ Some("org.apache.spark" % fullId % "1.0.0")
}
- def coreSettings = sharedSettings ++ Seq(
- name := "spark-core",
- libraryDependencies ++= Seq(
- "com.google.guava" % "guava" % "14.0.1",
- "org.apache.commons" % "commons-lang3" % "3.3.2",
- "org.apache.commons" % "commons-math3" % "3.3" % "test",
- "com.google.code.findbugs" % "jsr305" % "1.3.9",
- "log4j" % "log4j" % "1.2.17",
- "org.slf4j" % "slf4j-api" % slf4jVersion,
- "org.slf4j" % "slf4j-log4j12" % slf4jVersion,
- "org.slf4j" % "jul-to-slf4j" % slf4jVersion,
- "org.slf4j" % "jcl-over-slf4j" % slf4jVersion,
- "commons-daemon" % "commons-daemon" % "1.0.10", // workaround for bug HADOOP-9407
- "com.ning" % "compress-lzf" % "1.0.0",
- "org.xerial.snappy" % "snappy-java" % "1.0.5",
- "org.spark-project.akka" %% "akka-remote" % akkaVersion,
- "org.spark-project.akka" %% "akka-slf4j" % akkaVersion,
- "org.spark-project.akka" %% "akka-testkit" % akkaVersion % "test",
- "org.json4s" %% "json4s-jackson" % "3.2.6" excludeAll(excludeScalap),
- "colt" % "colt" % "1.2.0",
- "org.apache.mesos" % "mesos" % "0.18.1" classifier("shaded-protobuf") exclude("com.google.protobuf", "protobuf-java"),
- "commons-net" % "commons-net" % "2.2",
- "net.java.dev.jets3t" % "jets3t" % jets3tVersion excludeAll(excludeCommonsLogging),
- "commons-codec" % "commons-codec" % "1.5", // Prevent jets3t from including the older version of commons-codec
- "org.apache.derby" % "derby" % "10.4.2.0" % "test",
- "org.apache.hadoop" % hadoopClient % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeCommonsLogging, excludeSLF4J, excludeOldAsm, excludeServletApi),
- "org.apache.curator" % "curator-recipes" % "2.4.0" excludeAll(excludeJBossNetty),
- "com.codahale.metrics" % "metrics-core" % codahaleMetricsVersion,
- "com.codahale.metrics" % "metrics-jvm" % codahaleMetricsVersion,
- "com.codahale.metrics" % "metrics-json" % codahaleMetricsVersion,
- "com.codahale.metrics" % "metrics-graphite" % codahaleMetricsVersion,
- "com.twitter" %% "chill" % chillVersion excludeAll(excludeAsm),
- "com.twitter" % "chill-java" % chillVersion excludeAll(excludeAsm),
- "org.tachyonproject" % "tachyon" % "0.4.1-thrift" excludeAll(excludeHadoop, excludeCurator, excludeEclipseJetty, excludePowermock),
- "com.clearspring.analytics" % "stream" % "2.7.0" excludeAll(excludeFastutil), // Only HyperLogLogPlus is used, which does not depend on fastutil.
- "org.spark-project" % "pyrolite" % "2.0.1",
- "net.sf.py4j" % "py4j" % "0.8.1"
- ),
- libraryDependencies ++= maybeAvro,
- assembleDeps,
- previousArtifact := sparkPreviousArtifact("spark-core")
+ def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
+ name := "old-deps",
+ scalaVersion := "2.10.4",
+ retrieveManaged := true,
+ retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
+ libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
+ "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
+ "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
+ "spark-core").map(versionArtifact(_).get intransitive())
)
- // Create a colon-separate package list adding "org.apache.spark" in front of all of them,
- // for easier specification of JavaDoc package groups
- def packageList(names: String*): String = {
- names.map(s => "org.apache.spark." + s).mkString(":")
+ def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
+ val existingSettings = projectsMap.getOrElse(projectRef.project, Seq[Setting[_]]())
+ projectsMap += (projectRef.project -> (existingSettings ++ settings))
}
- def rootSettings = sharedSettings ++ scalaJavaUnidocSettings ++ Seq(
- publish := {},
+ // Note ordering of these settings matter.
+ /* Enable shared settings on all projects */
+ (allProjects ++ optionallyEnabledProjects ++ assemblyProjects).foreach(enable(sharedSettings))
- unidocProjectFilter in (ScalaUnidoc, unidoc) :=
- inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha),
- unidocProjectFilter in (JavaUnidoc, unidoc) :=
- inAnyProject -- inProjects(repl, examples, bagel, graphx, catalyst, tools, yarn, yarnAlpha),
+ /* Enable tests settings for all projects except examples, assembly and tools */
+ (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
- // Skip class names containing $ and some internal packages in Javadocs
- unidocAllSources in (JavaUnidoc, unidoc) := {
- (unidocAllSources in (JavaUnidoc, unidoc)).value
- .map(_.filterNot(_.getName.contains("$")))
- .map(_.filterNot(_.getCanonicalPath.contains("akka")))
- .map(_.filterNot(_.getCanonicalPath.contains("deploy")))
- .map(_.filterNot(_.getCanonicalPath.contains("network")))
- .map(_.filterNot(_.getCanonicalPath.contains("executor")))
- .map(_.filterNot(_.getCanonicalPath.contains("python")))
- .map(_.filterNot(_.getCanonicalPath.contains("collection")))
- },
+ /* Enable Mima for all projects except spark, hive, catalyst, sql and repl */
+ // TODO: Add Sql to mima checks
+ allProjects.filterNot(y => Seq(spark, sql, hive, catalyst, repl).exists(x => x == y)).
+ foreach (x => enable(MimaBuild.mimaSettings(sparkHome, x))(x))
- // Javadoc options: create a window title, and group key packages on index page
- javacOptions in doc := Seq(
- "-windowtitle", "Spark " + SPARK_VERSION_SHORT + " JavaDoc",
- "-public",
- "-group", "Core Java API", packageList("api.java", "api.java.function"),
- "-group", "Spark Streaming", packageList(
- "streaming.api.java", "streaming.flume", "streaming.kafka",
- "streaming.mqtt", "streaming.twitter", "streaming.zeromq"
- ),
- "-group", "MLlib", packageList(
- "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
- "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
- "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
- "mllib.tree.impurity", "mllib.tree.model", "mllib.util"
- ),
- "-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"),
- "-noqualifier", "java.lang"
- )
- )
+ /* Enable Assembly for all assembly projects */
+ assemblyProjects.foreach(enable(Assembly.settings))
- def replSettings = sharedSettings ++ Seq(
- name := "spark-repl",
- libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v),
- libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "jline" % v),
- libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v)
- )
+ /* Enable unidoc only for the root spark project */
+ enable(Unidoc.settings)(spark)
- def examplesSettings = sharedSettings ++ Seq(
- name := "spark-examples",
- jarName in assembly <<= version map {
- v => "spark-examples-" + v + "-hadoop" + hadoopVersion + ".jar" },
- libraryDependencies ++= Seq(
- "com.twitter" %% "algebird-core" % "0.1.11",
- "org.apache.hbase" % "hbase" % HBASE_VERSION excludeAll(excludeIONetty, excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeJruby),
- "org.apache.cassandra" % "cassandra-all" % "1.2.6"
- exclude("com.google.guava", "guava")
- exclude("com.googlecode.concurrentlinkedhashmap", "concurrentlinkedhashmap-lru")
- exclude("com.ning","compress-lzf")
- exclude("io.netty", "netty")
- exclude("jline","jline")
- exclude("org.apache.cassandra.deps", "avro")
- excludeAll(excludeSLF4J, excludeIONetty),
- "com.github.scopt" %% "scopt" % "3.2.0"
- )
- ) ++ assemblySettings ++ extraAssemblySettings
-
- def toolsSettings = sharedSettings ++ Seq(
- name := "spark-tools",
- libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-compiler" % v),
- libraryDependencies <+= scalaVersion(v => "org.scala-lang" % "scala-reflect" % v )
- ) ++ assemblySettings ++ extraAssemblySettings
-
- def graphxSettings = sharedSettings ++ Seq(
- name := "spark-graphx",
- previousArtifact := sparkPreviousArtifact("spark-graphx"),
- libraryDependencies ++= Seq(
- "org.jblas" % "jblas" % jblasVersion
- )
- )
+ /* Hive console settings */
+ enable(Hive.settings)(hive)
- def bagelSettings = sharedSettings ++ Seq(
- name := "spark-bagel",
- previousArtifact := sparkPreviousArtifact("spark-bagel")
- )
+ // TODO: move this to its upstream project.
+ override def projectDefinitions(baseDirectory: File): Seq[Project] = {
+ super.projectDefinitions(baseDirectory).map { x =>
+ if (projectsMap.exists(_._1 == x.id)) x.settings(projectsMap(x.id): _*)
+ else x.settings(Seq[Setting[_]](): _*)
+ } ++ Seq[Project](oldDeps)
+ }
- def mllibSettings = sharedSettings ++ Seq(
- name := "spark-mllib",
- previousArtifact := sparkPreviousArtifact("spark-mllib"),
- libraryDependencies ++= Seq(
- "org.jblas" % "jblas" % jblasVersion,
- "org.scalanlp" %% "breeze" % "0.7" excludeAll(excludeJUnit)
- )
- )
+}
- def catalystSettings = sharedSettings ++ Seq(
- name := "catalyst",
- // The mechanics of rewriting expression ids to compare trees in some test cases makes
- // assumptions about the the expression ids being contiguous. Running tests in parallel breaks
- // this non-deterministically. TODO: FIX THIS.
- parallelExecution in Test := false,
- libraryDependencies ++= Seq(
- "com.typesafe" %% "scalalogging-slf4j" % "1.0.1"
- )
- )
+object Hive {
- def sqlCoreSettings = sharedSettings ++ Seq(
- name := "spark-sql",
- libraryDependencies ++= Seq(
- "com.twitter" % "parquet-column" % parquetVersion,
- "com.twitter" % "parquet-hadoop" % parquetVersion,
- "com.fasterxml.jackson.core" % "jackson-databind" % "2.3.0" // json4s-jackson 3.2.6 requires jackson-databind 2.3.0.
- ),
- initialCommands in console :=
- """
- |import org.apache.spark.sql.catalyst.analysis._
- |import org.apache.spark.sql.catalyst.dsl._
- |import org.apache.spark.sql.catalyst.errors._
- |import org.apache.spark.sql.catalyst.expressions._
- |import org.apache.spark.sql.catalyst.plans.logical._
- |import org.apache.spark.sql.catalyst.rules._
- |import org.apache.spark.sql.catalyst.types._
- |import org.apache.spark.sql.catalyst.util._
- |import org.apache.spark.sql.execution
- |import org.apache.spark.sql.test.TestSQLContext._
- |import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin
- )
+ lazy val settings = Seq(
- // Since we don't include hive in the main assembly this project also acts as an alternative
- // assembly jar.
- def hiveSettings = sharedSettings ++ Seq(
- name := "spark-hive",
javaOptions += "-XX:MaxPermSize=1g",
- libraryDependencies ++= Seq(
- "org.spark-project.hive" % "hive-metastore" % hiveVersion,
- "org.spark-project.hive" % "hive-exec" % hiveVersion excludeAll(excludeCommonsLogging),
- "org.spark-project.hive" % "hive-serde" % hiveVersion
- ),
- // Multiple queries rely on the TestHive singleton. See comments there for more details.
+ // Multiple queries rely on the TestHive singleton. See comments there for more details.
parallelExecution in Test := false,
// Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
// only for this subproject.
@@ -555,67 +190,16 @@ object SparkBuild extends Build {
|import org.apache.spark.sql.parquet.ParquetTestData""".stripMargin
)
- def streamingSettings = sharedSettings ++ Seq(
- name := "spark-streaming",
- previousArtifact := sparkPreviousArtifact("spark-streaming")
- )
-
- def yarnCommonSettings = sharedSettings ++ Seq(
- unmanagedSourceDirectories in Compile <++= baseDirectory { base =>
- Seq(
- base / "../common/src/main/scala"
- )
- },
-
- unmanagedSourceDirectories in Test <++= baseDirectory { base =>
- Seq(
- base / "../common/src/test/scala"
- )
- }
-
- ) ++ extraYarnSettings
-
- def yarnAlphaSettings = yarnCommonSettings ++ Seq(
- name := "spark-yarn-alpha"
- )
-
- def yarnSettings = yarnCommonSettings ++ Seq(
- name := "spark-yarn"
- )
-
- def gangliaSettings = sharedSettings ++ Seq(
- name := "spark-ganglia-lgpl",
- libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0"
- )
-
- def java8TestsSettings = sharedSettings ++ Seq(
- name := "java8-tests",
- javacOptions := Seq("-target", "1.8", "-source", "1.8"),
- testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a")
- )
-
- // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain
- // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN).
- def extraYarnSettings = if(isYarnEnabled) yarnEnabledSettings else Seq()
-
- def yarnEnabledSettings = Seq(
- libraryDependencies ++= Seq(
- // Exclude rule required for all ?
- "org.apache.hadoop" % hadoopClient % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm),
- "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
- "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
- "org.apache.hadoop" % "hadoop-yarn-client" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging),
- "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % hadoopVersion excludeAll(excludeJBossNetty, excludeAsm, excludeOldAsm, excludeCommonsLogging, excludeServletApi)
- )
- )
+}
- def assemblyProjSettings = sharedSettings ++ Seq(
- name := "spark-assembly",
- jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
- ) ++ assemblySettings ++ extraAssemblySettings
+object Assembly {
+ import sbtassembly.Plugin._
+ import AssemblyKeys._
- def extraAssemblySettings() = Seq(
+ lazy val settings = assemblySettings ++ Seq(
test in assembly := {},
+ jarName in assembly <<= (version, moduleName) map { (v, mName) => mName + "-"+v + "-hadoop" +
+ Option(System.getProperty("hadoop.version")).getOrElse("1.0.4") + ".jar" },
mergeStrategy in assembly := {
case PathList("org", "datanucleus", xs @ _*) => MergeStrategy.discard
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
@@ -627,57 +211,95 @@ object SparkBuild extends Build {
}
)
- def oldDepsSettings() = Defaults.defaultSettings ++ Seq(
- name := "old-deps",
- scalaVersion := "2.10.4",
- retrieveManaged := true,
- retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]",
- libraryDependencies := Seq("spark-streaming-mqtt", "spark-streaming-zeromq",
- "spark-streaming-flume", "spark-streaming-kafka", "spark-streaming-twitter",
- "spark-streaming", "spark-mllib", "spark-bagel", "spark-graphx",
- "spark-core").map(sparkPreviousArtifact(_).get intransitive())
- )
+}
- def twitterSettings() = sharedSettings ++ Seq(
- name := "spark-streaming-twitter",
- previousArtifact := sparkPreviousArtifact("spark-streaming-twitter"),
- libraryDependencies ++= Seq(
- "org.twitter4j" % "twitter4j-stream" % "3.0.3"
- )
- )
+object Unidoc {
- def kafkaSettings() = sharedSettings ++ Seq(
- name := "spark-streaming-kafka",
- previousArtifact := sparkPreviousArtifact("spark-streaming-kafka"),
- libraryDependencies ++= Seq(
- "com.github.sgroschupf" % "zkclient" % "0.1",
- "org.apache.kafka" %% "kafka" % "0.8.0"
- exclude("com.sun.jdmk", "jmxtools")
- exclude("com.sun.jmx", "jmxri")
- exclude("net.sf.jopt-simple", "jopt-simple")
- excludeAll(excludeSLF4J)
- )
- )
+ import BuildCommons._
+ import sbtunidoc.Plugin._
+ import UnidocKeys._
+
+ // for easier specification of JavaDoc package groups
+ private def packageList(names: String*): String = {
+ names.map(s => "org.apache.spark." + s).mkString(":")
+ }
- def flumeSettings() = sharedSettings ++ Seq(
- name := "spark-streaming-flume",
- previousArtifact := sparkPreviousArtifact("spark-streaming-flume"),
- libraryDependencies ++= Seq(
- "org.apache.flume" % "flume-ng-sdk" % "1.4.0" % "compile" excludeAll(excludeIONetty, excludeThrift)
+ lazy val settings = scalaJavaUnidocSettings ++ Seq (
+ publish := {},
+
+ unidocProjectFilter in(ScalaUnidoc, unidoc) :=
+ inAnyProject -- inProjects(repl, examples, tools, catalyst, yarn, yarnAlpha),
+ unidocProjectFilter in(JavaUnidoc, unidoc) :=
+ inAnyProject -- inProjects(repl, bagel, graphx, examples, tools, catalyst, yarn, yarnAlpha),
+
+ // Skip class names containing $ and some internal packages in Javadocs
+ unidocAllSources in (JavaUnidoc, unidoc) := {
+ (unidocAllSources in (JavaUnidoc, unidoc)).value
+ .map(_.filterNot(_.getName.contains("$")))
+ .map(_.filterNot(_.getCanonicalPath.contains("akka")))
+ .map(_.filterNot(_.getCanonicalPath.contains("deploy")))
+ .map(_.filterNot(_.getCanonicalPath.contains("network")))
+ .map(_.filterNot(_.getCanonicalPath.contains("executor")))
+ .map(_.filterNot(_.getCanonicalPath.contains("python")))
+ .map(_.filterNot(_.getCanonicalPath.contains("collection")))
+ },
+
+ // Javadoc options: create a window title, and group key packages on index page
+ javacOptions in doc := Seq(
+ "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
+ "-public",
+ "-group", "Core Java API", packageList("api.java", "api.java.function"),
+ "-group", "Spark Streaming", packageList(
+ "streaming.api.java", "streaming.flume", "streaming.kafka",
+ "streaming.mqtt", "streaming.twitter", "streaming.zeromq"
+ ),
+ "-group", "MLlib", packageList(
+ "mllib.classification", "mllib.clustering", "mllib.evaluation.binary", "mllib.linalg",
+ "mllib.linalg.distributed", "mllib.optimization", "mllib.rdd", "mllib.recommendation",
+ "mllib.regression", "mllib.stat", "mllib.tree", "mllib.tree.configuration",
+ "mllib.tree.impurity", "mllib.tree.model", "mllib.util"
+ ),
+ "-group", "Spark SQL", packageList("sql.api.java", "sql.hive.api.java"),
+ "-noqualifier", "java.lang"
)
)
+}
- def zeromqSettings() = sharedSettings ++ Seq(
- name := "spark-streaming-zeromq",
- previousArtifact := sparkPreviousArtifact("spark-streaming-zeromq"),
- libraryDependencies ++= Seq(
- "org.spark-project.akka" %% "akka-zeromq" % akkaVersion
+object TestSettings {
+ import BuildCommons._
+
+ lazy val settings = Seq (
+ // Fork new JVMs for tests and set Java options for those
+ fork := true,
+ javaOptions in Test += "-Dspark.home=" + sparkHome,
+ javaOptions in Test += "-Dspark.testing=1",
+ javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=true",
+ javaOptions in Test ++= System.getProperties.filter(_._1 startsWith "spark")
+ .map { case (k,v) => s"-D$k=$v" }.toSeq,
+ javaOptions in Test ++= "-Xmx3g -XX:PermSize=128M -XX:MaxNewSize=256m -XX:MaxPermSize=1g"
+ .split(" ").toSeq,
+ javaOptions += "-Xmx3g",
+
+ // Show full stack trace and duration in test cases.
+ testOptions in Test += Tests.Argument("-oDF"),
+ testOptions += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
+ // Enable Junit testing.
+ libraryDependencies += "com.novocode" % "junit-interface" % "0.9" % "test",
+ // Only allow one test at a time, even across projects, since they run in the same JVM
+ parallelExecution in Test := false,
+ concurrentRestrictions in Global += Tags.limit(Tags.Test, 1),
+ // Remove certain packages from Scaladoc
+ scalacOptions in (Compile, doc) := Seq(
+ "-groups",
+ "-skip-packages", Seq(
+ "akka",
+ "org.apache.spark.api.python",
+ "org.apache.spark.network",
+ "org.apache.spark.deploy",
+ "org.apache.spark.util.collection"
+ ).mkString(":"),
+ "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc"
)
)
- def mqttSettings() = streamingSettings ++ Seq(
- name := "spark-streaming-mqtt",
- previousArtifact := sparkPreviousArtifact("spark-streaming-mqtt"),
- libraryDependencies ++= Seq("org.eclipse.paho" % "mqtt-client" % "0.4.0")
- )
}