diff options
-rw-r--r-- | assembly/pom.xml | 10 | ||||
-rw-r--r-- | core/pom.xml | 4 | ||||
-rw-r--r-- | dev/audit-release/README.md | 11 | ||||
-rw-r--r-- | dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala | 15 | ||||
-rw-r--r-- | dev/audit-release/sbt_app_ganglia/build.sbt | 31 | ||||
-rw-r--r-- | dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala | 39 | ||||
-rwxr-xr-x | dev/create-release/create-release.sh | 4 | ||||
-rw-r--r-- | docs/monitoring.md | 13 | ||||
-rw-r--r-- | extras/spark-ganglia-lgpl/pom.xml | 45 | ||||
-rw-r--r-- | extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala (renamed from core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala) | 0 | ||||
-rw-r--r-- | pom.xml | 9 | ||||
-rw-r--r-- | project/SparkBuild.scala | 25 |
12 files changed, 190 insertions, 16 deletions
diff --git a/assembly/pom.xml b/assembly/pom.xml index 82a5985504..22bbbc57d8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -159,6 +159,16 @@ </dependencies> </profile> <profile> + <id>spark-ganglia-lgpl</id> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId> + <version>${project.version}</version> + </dependency> + </dependencies> + </profile> + <profile> <id>bigtop-dist</id> <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop that contains Spark but not the Hadoop JARs it depends on. --> diff --git a/core/pom.xml b/core/pom.xml index 4d7d41a971..2248f9d044 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -181,10 +181,6 @@ </dependency> <dependency> <groupId>com.codahale.metrics</groupId> - <artifactId>metrics-ganglia</artifactId> - </dependency> - <dependency> - <groupId>com.codahale.metrics</groupId> <artifactId>metrics-graphite</artifactId> </dependency> <dependency> diff --git a/dev/audit-release/README.md b/dev/audit-release/README.md new file mode 100644 index 0000000000..2437a98672 --- /dev/null +++ b/dev/audit-release/README.md @@ -0,0 +1,11 @@ +# Test Application Builds +This directory includes test applications which are built when auditing releases. You can +run them locally by setting appropriate environment variables. + +``` +$ cd sbt_app_core +$ SCALA_VERSION=2.10.3 \ + SPARK_VERSION=1.0.0-SNAPSHOT \ + SPARK_RELEASE_REPOSITORY=file:///home/patrick/.ivy2/local \ + sbt run +``` diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala index d49de8b73a..53fe43215e 100644 --- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala +++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala @@ -17,6 +17,8 @@ package main.scala +import scala.util.Try + import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ @@ -31,6 +33,17 @@ object SimpleApp { println("Failed to parse log files with Spark") System.exit(-1) } - println("Test succeeded") + + // Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue + val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess + val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess + if (!foundConsole) { + println("Console sink not loaded via spark-core") + System.exit(-1) + } + if (foundGanglia) { + println("Ganglia sink was loaded via spark-core") + System.exit(-1) + } } } diff --git a/dev/audit-release/sbt_app_ganglia/build.sbt b/dev/audit-release/sbt_app_ganglia/build.sbt new file mode 100644 index 0000000000..55db675c72 --- /dev/null +++ b/dev/audit-release/sbt_app_ganglia/build.sbt @@ -0,0 +1,31 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +name := "Ganglia Test" + +version := "1.0" + +scalaVersion := System.getenv.get("SCALA_VERSION") + +libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION") + +libraryDependencies += "org.apache.spark" %% "spark-ganglia-lgpl" % System.getenv.get("SPARK_VERSION") + +resolvers ++= Seq( + "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"), + "Akka Repository" at "http://repo.akka.io/releases/", + "Spray Repository" at "http://repo.spray.cc/") diff --git a/dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala new file mode 100644 index 0000000000..0be8e64fbf --- /dev/null +++ b/dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main.scala + +import scala.util.Try + +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ + +object SimpleApp { + def main(args: Array[String]) { + // Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue + val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess + val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess + if (!foundConsole) { + println("Console sink not loaded via spark-core") + System.exit(-1) + } + if (!foundGanglia) { + println("Ganglia sink not loaded via spark-ganglia-lgpl") + System.exit(-1) + } + } +} diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index b9088eac37..995106f111 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -49,14 +49,14 @@ mvn -DskipTests \ -Darguments="-DskipTests=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \ -Dusername=$GIT_USERNAME -Dpassword=$GIT_PASSWORD \ -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ - -Pyarn \ + -Pyarn -Pspark-ganglia-lgpl \ -Dtag=$GIT_TAG -DautoVersionSubmodules=true \ --batch-mode release:prepare mvn -DskipTests \ -Darguments="-DskipTests=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \ -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ - -Pyarn \ + -Pyarn -Pspark-ganglia-lgpl\ release:perform rm -rf spark diff --git a/docs/monitoring.md b/docs/monitoring.md index e9b1d2b2f4..15bfb04178 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -48,11 +48,22 @@ Each instance can report to zero or more _sinks_. Sinks are contained in the * `ConsoleSink`: Logs metrics information to the console. * `CSVSink`: Exports metrics data to CSV files at regular intervals. -* `GangliaSink`: Sends metrics to a Ganglia node or multicast group. * `JmxSink`: Registers metrics for viewing in a JXM console. * `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data. * `GraphiteSink`: Sends metrics to a Graphite node. +Spark also supports a Ganglia sink which is not included in the default build due to +licensing restrictions: + +* `GangliaSink`: Sends metrics to a Ganglia node or multicast group. + +To install the `GangliaSink` you'll need to perform a custom build of Spark. _**Note that +by embedding this library you will include [LGPL](http://www.gnu.org/copyleft/lesser.html)-licensed +code in your Spark package**_. For sbt users, set the +`SPARK_GANGLIA_LGPL` environment variable before building. For Maven users, enable +the `-Pspark-ganglia-lgpl` profile. In addition to modifying the cluster's Spark build +user applications will need to link to the `spark-ganglia-lgpl` artifact. + The syntax of the metrics configuration file is defined in an example configuration file, `$SPARK_HOME/conf/metrics.properties.template`. diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml new file mode 100644 index 0000000000..11ac827ed5 --- /dev/null +++ b/extras/spark-ganglia-lgpl/pom.xml @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +~ Licensed to the Apache Software Foundation (ASF) under one or more +~ contributor license agreements. See the NOTICE file distributed with +~ this work for additional information regarding copyright ownership. +~ The ASF licenses this file to You under the Apache License, Version 2.0 +~ (the "License"); you may not use this file except in compliance with +~ the License. You may obtain a copy of the License at +~ +~ http://www.apache.org/licenses/LICENSE-2.0 +~ +~ Unless required by applicable law or agreed to in writing, software +~ distributed under the License is distributed on an "AS IS" BASIS, +~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~ See the License for the specific language governing permissions and +~ limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.spark</groupId> + <artifactId>spark-parent</artifactId> + <version>1.0.0-SNAPSHOT</version> + <relativePath>../../pom.xml</relativePath> + </parent> + + <!-- Ganglia integration is not included by default due to LGPL-licensed code --> + <groupId>org.apache.spark</groupId> + <artifactId>spark-ganglia-lgpl_2.10</artifactId> + <packaging>jar</packaging> + <name>Spark Ganglia Integration</name> + + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core_${scala.binary.version}</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>com.codahale.metrics</groupId> + <artifactId>metrics-ganglia</artifactId> + </dependency> + </dependencies> +</project> diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala index cd37317da7..cd37317da7 100644 --- a/core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala +++ b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala @@ -756,12 +756,19 @@ <hadoop.version>0.23.7</hadoop.version> <!--<hadoop.version>2.0.5-alpha</hadoop.version> --> </properties> - <modules> <module>yarn</module> </modules> + </profile> + <!-- Ganglia integration is not included by default due to LGPL-licensed code --> + <profile> + <id>spark-ganglia-lgpl</id> + <modules> + <module>extras/spark-ganglia-lgpl</module> + </modules> </profile> + <profile> <id>java8-tests</id> <build> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8fa220c413..b0c3bf29df 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -65,7 +65,7 @@ object SparkBuild extends Build { lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core) lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings) - .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) + .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) dependsOn(maybeGanglia: _*) lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects") @@ -91,19 +91,26 @@ object SparkBuild extends Build { lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client" val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq() - // Conditionally include the java 8 sub-project + // Include Ganglia integration if the user has enabled Ganglia + // This is isolated from the normal build due to LGPL-licensed code in the library + lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined + lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core) + val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq() + val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq() + + // Include the Java 8 project if the JVM version is 8+ lazy val javaVersion = System.getProperty("java.specification.version") lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]() lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings). dependsOn(core) dependsOn(streaming % "compile->compile;test->test") - // Conditionally include the yarn sub-project + // Include the YARN project if the user has enabled YARN lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core) lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core) - lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]() - lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]() + lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq() + lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq() lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings) .dependsOn(streaming % "compile->compile;test->test") @@ -127,7 +134,7 @@ object SparkBuild extends Build { .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*) // Everything except assembly, tools, java8Tests and examples belong to packageProjects - lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef + lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef ++ maybeGangliaRef lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests @@ -296,7 +303,6 @@ object SparkBuild extends Build { "com.codahale.metrics" % "metrics-core" % "3.0.0", "com.codahale.metrics" % "metrics-jvm" % "3.0.0", "com.codahale.metrics" % "metrics-json" % "3.0.0", - "com.codahale.metrics" % "metrics-ganglia" % "3.0.0", "com.codahale.metrics" % "metrics-graphite" % "3.0.0", "com.twitter" %% "chill" % "0.3.1" excludeAll(excludeAsm), "com.twitter" % "chill-java" % "0.3.1" excludeAll(excludeAsm), @@ -384,6 +390,11 @@ object SparkBuild extends Build { name := "spark-yarn" ) + def gangliaSettings = sharedSettings ++ Seq( + name := "spark-ganglia-lgpl", + libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0" + ) + def java8TestsSettings = sharedSettings ++ Seq( name := "java8-tests", javacOptions := Seq("-target", "1.8", "-source", "1.8"), |