From 666d93c294458cb056cb590eb11bb6cf979861e5 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Tue, 27 Aug 2013 19:23:54 -0700 Subject: Update Maven build to create assemblies expected by new scripts This includes the following changes: - The "assembly" package now builds in Maven by default, and creates an assembly containing both hadoop-client and Spark, unlike the old BigTop distribution assembly that skipped hadoop-client - There is now a bigtop-dist package to build the old BigTop assembly - The repl-bin package is no longer built by default since the scripts don't reply on it; instead it can be enabled with -Prepl-bin - Py4J is now included in the assembly/lib folder as a local Maven repo, so that the Maven package can link to it - run-example now adds the original Spark classpath as well because the Maven examples assembly lists spark-core and such as provided - The various Maven projects add a spark-yarn dependency correctly --- assembly/lib/PY4J_LICENSE.txt | 27 ++++ assembly/lib/PY4J_VERSION.txt | 1 + assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar | Bin 0 -> 103286 bytes assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom | 9 ++ .../lib/net/sf/py4j/py4j/maven-metadata-local.xml | 12 ++ assembly/pom.xml | 138 +++++++++++++++++---- assembly/src/main/assembly/assembly.xml | 19 ++- core/lib/PY4J_LICENSE.txt | 27 ---- core/lib/PY4J_VERSION.txt | 1 - core/lib/py4j0.7.jar | Bin 103286 -> 0 bytes examples/pom.xml | 62 ++++++++- pom.xml | 16 +-- project/SparkBuild.scala | 4 +- run-example | 9 +- 14 files changed, 250 insertions(+), 75 deletions(-) create mode 100644 assembly/lib/PY4J_LICENSE.txt create mode 100644 assembly/lib/PY4J_VERSION.txt create mode 100644 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar create mode 100644 assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom create mode 100644 assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml delete mode 100644 core/lib/PY4J_LICENSE.txt delete mode 100644 core/lib/PY4J_VERSION.txt delete mode 100644 core/lib/py4j0.7.jar diff --git a/assembly/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt new file mode 100644 index 0000000000..a70279ca14 --- /dev/null +++ b/assembly/lib/PY4J_LICENSE.txt @@ -0,0 +1,27 @@ + +Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +- The name of the author may not be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/assembly/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt new file mode 100644 index 0000000000..04a0cd52a8 --- /dev/null +++ b/assembly/lib/PY4J_VERSION.txt @@ -0,0 +1 @@ +b7924aabe9c5e63f0a4d8bbd17019534c7ec014e diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar new file mode 100644 index 0000000000..73b7ddb7d1 Binary files /dev/null and b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar differ diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom new file mode 100644 index 0000000000..1c730e19b4 --- /dev/null +++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom @@ -0,0 +1,9 @@ + + + 4.0.0 + net.sf.py4j + py4j + 0.7 + POM was created from install:install-file + diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml new file mode 100644 index 0000000000..6942ff45e7 --- /dev/null +++ b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml @@ -0,0 +1,12 @@ + + + net.sf.py4j + py4j + + 0.7 + + 0.7 + + 20130828020333 + + diff --git a/assembly/pom.xml b/assembly/pom.xml index ca20ccadba..74990b6361 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -1,4 +1,21 @@ + + 4.0.0 @@ -13,29 +30,13 @@ Spark Project Assembly http://spark-project.org/ - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.4 - - - dist - package - - single - - - - src/main/assembly/assembly.xml - - - - - - - + + + + lib + file://${project.basedir}/lib + + @@ -63,5 +64,96 @@ spark-streaming ${project.version} + + net.sf.py4j + py4j + 0.7 + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + false + ${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + reference.conf + + + + + + + + + + + + hadoop2-yarn + + + org.spark-project + spark-yarn + ${project.version} + + + + + bigtop-dist + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.4 + + + dist + package + + single + + + + src/main/assembly/assembly.xml + + + + + + + + + diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml index 14485b7181..4543b52c93 100644 --- a/assembly/src/main/assembly/assembly.xml +++ b/assembly/src/main/assembly/assembly.xml @@ -1,3 +1,19 @@ + dist @@ -36,7 +52,8 @@ /bin - run* + run-example* + spark-class* spark-shell* spark-executor* diff --git a/core/lib/PY4J_LICENSE.txt b/core/lib/PY4J_LICENSE.txt deleted file mode 100644 index a70279ca14..0000000000 --- a/core/lib/PY4J_LICENSE.txt +++ /dev/null @@ -1,27 +0,0 @@ - -Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -- The name of the author may not be used to endorse or promote products -derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/core/lib/PY4J_VERSION.txt b/core/lib/PY4J_VERSION.txt deleted file mode 100644 index 04a0cd52a8..0000000000 --- a/core/lib/PY4J_VERSION.txt +++ /dev/null @@ -1 +0,0 @@ -b7924aabe9c5e63f0a4d8bbd17019534c7ec014e diff --git a/core/lib/py4j0.7.jar b/core/lib/py4j0.7.jar deleted file mode 100644 index 73b7ddb7d1..0000000000 Binary files a/core/lib/py4j0.7.jar and /dev/null differ diff --git a/examples/pom.xml b/examples/pom.xml index d24bd404fa..687fbcca8f 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -36,21 +36,25 @@ org.spark-project spark-core ${project.version} + provided org.spark-project spark-streaming ${project.version} + provided org.spark-project spark-mllib ${project.version} + provided org.spark-project spark-bagel ${project.version} + provided org.apache.hbase @@ -67,10 +71,6 @@ - - org.scala-lang - scala-library - org.eclipse.jetty jetty-server @@ -126,13 +126,63 @@ + + + + hadoop2-yarn + + + org.spark-project + spark-yarn + ${project.version} + provided + + + + + target/scala-${scala.version}/classes target/scala-${scala.version}/test-classes - org.scalatest - scalatest-maven-plugin + org.apache.maven.plugins + maven-shade-plugin + + false + ${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar + + + *:* + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + package + + shade + + + + + + reference.conf + + + + + diff --git a/pom.xml b/pom.xml index ea9548359f..e2fd54a966 100644 --- a/pom.xml +++ b/pom.xml @@ -62,6 +62,7 @@ tools streaming repl + assembly @@ -75,7 +76,7 @@ 1.7.2 1.2.17 1.0.4 - + 0.94.6 64m 512m @@ -743,21 +744,10 @@ - assembly + repl-bin false - - assembly - - - - expensive-modules - - - !noExpensive - - repl-bin diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8797e65b8d..2e26812671 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -41,7 +41,7 @@ object SparkBuild extends Build { .dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, bagel, streaming) + .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*) lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) @@ -261,7 +261,7 @@ object SparkBuild extends Build { def yarnSettings = sharedSettings ++ Seq( name := "spark-yarn" - ) ++ extraYarnSettings ++ assemblySettings ++ extraAssemblySettings + ) ++ extraYarnSettings // Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain // if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN). diff --git a/run-example b/run-example index e1b26257e1..ccd4356bdf 100755 --- a/run-example +++ b/run-example @@ -54,6 +54,11 @@ if [[ -z $SPARK_EXAMPLES_JAR ]]; then exit 1 fi +# Since the examples JAR ideally shouldn't include spark-core (that dependency should be +# "provided"), also add our standard Spark classpath, built using compute-classpath.sh. +CLASSPATH=`$FWDIR/bin/compute-classpath.sh` +CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH" + # Find java binary if [ -n "${JAVA_HOME}" ]; then RUNNER="${JAVA_HOME}/bin/java" @@ -68,9 +73,9 @@ fi if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then echo -n "Spark Command: " - echo "$RUNNER" -cp "$SPARK_EXAMPLES_JAR" "$@" + echo "$RUNNER" -cp "$CLASSPATH" "$@" echo "========================================" echo fi -exec "$RUNNER" -cp "$SPARK_EXAMPLES_JAR" "$@" +exec "$RUNNER" -cp "$CLASSPATH" "$@" -- cgit v1.2.3