aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--assembly/pom.xml10
-rw-r--r--core/pom.xml4
-rw-r--r--dev/audit-release/README.md11
-rw-r--r--dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala15
-rw-r--r--dev/audit-release/sbt_app_ganglia/build.sbt31
-rw-r--r--dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala39
-rwxr-xr-xdev/create-release/create-release.sh4
-rw-r--r--docs/monitoring.md13
-rw-r--r--extras/spark-ganglia-lgpl/pom.xml45
-rw-r--r--extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala (renamed from core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala)0
-rw-r--r--pom.xml9
-rw-r--r--project/SparkBuild.scala25
12 files changed, 190 insertions, 16 deletions
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 82a5985504..22bbbc57d8 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -159,6 +159,16 @@
</dependencies>
</profile>
<profile>
+ <id>spark-ganglia-lgpl</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-ganglia-lgpl_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
<id>bigtop-dist</id>
<!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
that contains Spark but not the Hadoop JARs it depends on. -->
diff --git a/core/pom.xml b/core/pom.xml
index 4d7d41a971..2248f9d044 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -181,10 +181,6 @@
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
- <artifactId>metrics-ganglia</artifactId>
- </dependency>
- <dependency>
- <groupId>com.codahale.metrics</groupId>
<artifactId>metrics-graphite</artifactId>
</dependency>
<dependency>
diff --git a/dev/audit-release/README.md b/dev/audit-release/README.md
new file mode 100644
index 0000000000..2437a98672
--- /dev/null
+++ b/dev/audit-release/README.md
@@ -0,0 +1,11 @@
+# Test Application Builds
+This directory includes test applications which are built when auditing releases. You can
+run them locally by setting appropriate environment variables.
+
+```
+$ cd sbt_app_core
+$ SCALA_VERSION=2.10.3 \
+ SPARK_VERSION=1.0.0-SNAPSHOT \
+ SPARK_RELEASE_REPOSITORY=file:///home/patrick/.ivy2/local \
+ sbt run
+```
diff --git a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
index d49de8b73a..53fe43215e 100644
--- a/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
+++ b/dev/audit-release/sbt_app_core/src/main/scala/SparkApp.scala
@@ -17,6 +17,8 @@
package main.scala
+import scala.util.Try
+
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
@@ -31,6 +33,17 @@ object SimpleApp {
println("Failed to parse log files with Spark")
System.exit(-1)
}
- println("Test succeeded")
+
+ // Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue
+ val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess
+ val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess
+ if (!foundConsole) {
+ println("Console sink not loaded via spark-core")
+ System.exit(-1)
+ }
+ if (foundGanglia) {
+ println("Ganglia sink was loaded via spark-core")
+ System.exit(-1)
+ }
}
}
diff --git a/dev/audit-release/sbt_app_ganglia/build.sbt b/dev/audit-release/sbt_app_ganglia/build.sbt
new file mode 100644
index 0000000000..55db675c72
--- /dev/null
+++ b/dev/audit-release/sbt_app_ganglia/build.sbt
@@ -0,0 +1,31 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+name := "Ganglia Test"
+
+version := "1.0"
+
+scalaVersion := System.getenv.get("SCALA_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-core" % System.getenv.get("SPARK_VERSION")
+
+libraryDependencies += "org.apache.spark" %% "spark-ganglia-lgpl" % System.getenv.get("SPARK_VERSION")
+
+resolvers ++= Seq(
+ "Spark Release Repository" at System.getenv.get("SPARK_RELEASE_REPOSITORY"),
+ "Akka Repository" at "http://repo.akka.io/releases/",
+ "Spray Repository" at "http://repo.spray.cc/")
diff --git a/dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala b/dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala
new file mode 100644
index 0000000000..0be8e64fbf
--- /dev/null
+++ b/dev/audit-release/sbt_app_ganglia/src/main/scala/SparkApp.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main.scala
+
+import scala.util.Try
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+
+object SimpleApp {
+ def main(args: Array[String]) {
+ // Regression test for SPARK-1167: Remove metrics-ganglia from default build due to LGPL issue
+ val foundConsole = Try(Class.forName("org.apache.spark.metrics.sink.ConsoleSink")).isSuccess
+ val foundGanglia = Try(Class.forName("org.apache.spark.metrics.sink.GangliaSink")).isSuccess
+ if (!foundConsole) {
+ println("Console sink not loaded via spark-core")
+ System.exit(-1)
+ }
+ if (!foundGanglia) {
+ println("Ganglia sink not loaded via spark-ganglia-lgpl")
+ System.exit(-1)
+ }
+ }
+}
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index b9088eac37..995106f111 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -49,14 +49,14 @@ mvn -DskipTests \
-Darguments="-DskipTests=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
-Dusername=$GIT_USERNAME -Dpassword=$GIT_PASSWORD \
-Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
- -Pyarn \
+ -Pyarn -Pspark-ganglia-lgpl \
-Dtag=$GIT_TAG -DautoVersionSubmodules=true \
--batch-mode release:prepare
mvn -DskipTests \
-Darguments="-DskipTests=true -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -Dgpg.passphrase=${GPG_PASSPHRASE}" \
-Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
- -Pyarn \
+ -Pyarn -Pspark-ganglia-lgpl\
release:perform
rm -rf spark
diff --git a/docs/monitoring.md b/docs/monitoring.md
index e9b1d2b2f4..15bfb04178 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -48,11 +48,22 @@ Each instance can report to zero or more _sinks_. Sinks are contained in the
* `ConsoleSink`: Logs metrics information to the console.
* `CSVSink`: Exports metrics data to CSV files at regular intervals.
-* `GangliaSink`: Sends metrics to a Ganglia node or multicast group.
* `JmxSink`: Registers metrics for viewing in a JXM console.
* `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data.
* `GraphiteSink`: Sends metrics to a Graphite node.
+Spark also supports a Ganglia sink which is not included in the default build due to
+licensing restrictions:
+
+* `GangliaSink`: Sends metrics to a Ganglia node or multicast group.
+
+To install the `GangliaSink` you'll need to perform a custom build of Spark. _**Note that
+by embedding this library you will include [LGPL](http://www.gnu.org/copyleft/lesser.html)-licensed
+code in your Spark package**_. For sbt users, set the
+`SPARK_GANGLIA_LGPL` environment variable before building. For Maven users, enable
+the `-Pspark-ganglia-lgpl` profile. In addition to modifying the cluster's Spark build
+user applications will need to link to the `spark-ganglia-lgpl` artifact.
+
The syntax of the metrics configuration file is defined in an example configuration file,
`$SPARK_HOME/conf/metrics.properties.template`.
diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml
new file mode 100644
index 0000000000..11ac827ed5
--- /dev/null
+++ b/extras/spark-ganglia-lgpl/pom.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+~ Licensed to the Apache Software Foundation (ASF) under one or more
+~ contributor license agreements. See the NOTICE file distributed with
+~ this work for additional information regarding copyright ownership.
+~ The ASF licenses this file to You under the Apache License, Version 2.0
+~ (the "License"); you may not use this file except in compliance with
+~ the License. You may obtain a copy of the License at
+~
+~ http://www.apache.org/licenses/LICENSE-2.0
+~
+~ Unless required by applicable law or agreed to in writing, software
+~ distributed under the License is distributed on an "AS IS" BASIS,
+~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~ See the License for the specific language governing permissions and
+~ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-parent</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <relativePath>../../pom.xml</relativePath>
+ </parent>
+
+ <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-ganglia-lgpl_2.10</artifactId>
+ <packaging>jar</packaging>
+ <name>Spark Ganglia Integration</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+
+ <dependency>
+ <groupId>com.codahale.metrics</groupId>
+ <artifactId>metrics-ganglia</artifactId>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
index cd37317da7..cd37317da7 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
+++ b/extras/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
diff --git a/pom.xml b/pom.xml
index f0c877dcfe..986626f029 100644
--- a/pom.xml
+++ b/pom.xml
@@ -756,12 +756,19 @@
<hadoop.version>0.23.7</hadoop.version>
<!--<hadoop.version>2.0.5-alpha</hadoop.version> -->
</properties>
-
<modules>
<module>yarn</module>
</modules>
+ </profile>
+ <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
+ <profile>
+ <id>spark-ganglia-lgpl</id>
+ <modules>
+ <module>extras/spark-ganglia-lgpl</module>
+ </modules>
</profile>
+
<profile>
<id>java8-tests</id>
<build>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 8fa220c413..b0c3bf29df 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -65,7 +65,7 @@ object SparkBuild extends Build {
lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
- .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
+ .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*) dependsOn(maybeGanglia: _*)
lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects")
@@ -91,19 +91,26 @@ object SparkBuild extends Build {
lazy val hadoopClient = if (hadoopVersion.startsWith("0.20.") || hadoopVersion == "1.0.0") "hadoop-core" else "hadoop-client"
val maybeAvro = if (hadoopVersion.startsWith("0.23.") && isYarnEnabled) Seq("org.apache.avro" % "avro" % "1.7.4") else Seq()
- // Conditionally include the java 8 sub-project
+ // Include Ganglia integration if the user has enabled Ganglia
+ // This is isolated from the normal build due to LGPL-licensed code in the library
+ lazy val isGangliaEnabled = Properties.envOrNone("SPARK_GANGLIA_LGPL").isDefined
+ lazy val gangliaProj = Project("spark-ganglia-lgpl", file("extras/spark-ganglia-lgpl"), settings = gangliaSettings).dependsOn(core)
+ val maybeGanglia: Seq[ClasspathDependency] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
+ val maybeGangliaRef: Seq[ProjectReference] = if (isGangliaEnabled) Seq(gangliaProj) else Seq()
+
+ // Include the Java 8 project if the JVM version is 8+
lazy val javaVersion = System.getProperty("java.specification.version")
lazy val isJava8Enabled = javaVersion.toDouble >= "1.8".toDouble
val maybeJava8Tests = if (isJava8Enabled) Seq[ProjectReference](java8Tests) else Seq[ProjectReference]()
lazy val java8Tests = Project("java8-tests", file("extras/java8-tests"), settings = java8TestsSettings).
dependsOn(core) dependsOn(streaming % "compile->compile;test->test")
- // Conditionally include the yarn sub-project
+ // Include the YARN project if the user has enabled YARN
lazy val yarnAlpha = Project("yarn-alpha", file("yarn/alpha"), settings = yarnAlphaSettings) dependsOn(core)
lazy val yarn = Project("yarn", file("yarn/stable"), settings = yarnSettings) dependsOn(core)
- lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](if (isNewHadoop) yarn else yarnAlpha) else Seq[ClasspathDependency]()
- lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](if (isNewHadoop) yarn else yarnAlpha) else Seq[ProjectReference]()
+ lazy val maybeYarn: Seq[ClasspathDependency] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
+ lazy val maybeYarnRef: Seq[ProjectReference] = if (isYarnEnabled) Seq(if (isNewHadoop) yarn else yarnAlpha) else Seq()
lazy val externalTwitter = Project("external-twitter", file("external/twitter"), settings = twitterSettings)
.dependsOn(streaming % "compile->compile;test->test")
@@ -127,7 +134,7 @@ object SparkBuild extends Build {
.dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*)
// Everything except assembly, tools, java8Tests and examples belong to packageProjects
- lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef
+ lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef ++ maybeGangliaRef
lazy val allProjects = packageProjects ++ allExternalRefs ++
Seq[ProjectReference](examples, tools, assemblyProj) ++ maybeJava8Tests
@@ -296,7 +303,6 @@ object SparkBuild extends Build {
"com.codahale.metrics" % "metrics-core" % "3.0.0",
"com.codahale.metrics" % "metrics-jvm" % "3.0.0",
"com.codahale.metrics" % "metrics-json" % "3.0.0",
- "com.codahale.metrics" % "metrics-ganglia" % "3.0.0",
"com.codahale.metrics" % "metrics-graphite" % "3.0.0",
"com.twitter" %% "chill" % "0.3.1" excludeAll(excludeAsm),
"com.twitter" % "chill-java" % "0.3.1" excludeAll(excludeAsm),
@@ -384,6 +390,11 @@ object SparkBuild extends Build {
name := "spark-yarn"
)
+ def gangliaSettings = sharedSettings ++ Seq(
+ name := "spark-ganglia-lgpl",
+ libraryDependencies += "com.codahale.metrics" % "metrics-ganglia" % "3.0.0"
+ )
+
def java8TestsSettings = sharedSettings ++ Seq(
name := "java8-tests",
javacOptions := Seq("-target", "1.8", "-source", "1.8"),