aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--assembly/lib/PY4J_LICENSE.txt (renamed from core/lib/PY4J_LICENSE.txt)0
-rw-r--r--assembly/lib/PY4J_VERSION.txt (renamed from core/lib/PY4J_VERSION.txt)0
-rw-r--r--assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar (renamed from core/lib/py4j0.7.jar)bin103286 -> 103286 bytes
-rw-r--r--assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom9
-rw-r--r--assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml12
-rw-r--r--assembly/pom.xml138
-rw-r--r--assembly/src/main/assembly/assembly.xml19
-rw-r--r--examples/pom.xml62
-rw-r--r--pom.xml16
-rw-r--r--project/SparkBuild.scala4
-rwxr-xr-xrun-example9
11 files changed, 222 insertions, 47 deletions
diff --git a/core/lib/PY4J_LICENSE.txt b/assembly/lib/PY4J_LICENSE.txt
index a70279ca14..a70279ca14 100644
--- a/core/lib/PY4J_LICENSE.txt
+++ b/assembly/lib/PY4J_LICENSE.txt
diff --git a/core/lib/PY4J_VERSION.txt b/assembly/lib/PY4J_VERSION.txt
index 04a0cd52a8..04a0cd52a8 100644
--- a/core/lib/PY4J_VERSION.txt
+++ b/assembly/lib/PY4J_VERSION.txt
diff --git a/core/lib/py4j0.7.jar b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
index 73b7ddb7d1..73b7ddb7d1 100644
--- a/core/lib/py4j0.7.jar
+++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.jar
Binary files differ
diff --git a/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
new file mode 100644
index 0000000000..1c730e19b4
--- /dev/null
+++ b/assembly/lib/net/sf/py4j/py4j/0.7/py4j-0.7.pom
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>net.sf.py4j</groupId>
+ <artifactId>py4j</artifactId>
+ <version>0.7</version>
+ <description>POM was created from install:install-file</description>
+</project>
diff --git a/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
new file mode 100644
index 0000000000..6942ff45e7
--- /dev/null
+++ b/assembly/lib/net/sf/py4j/py4j/maven-metadata-local.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<metadata>
+ <groupId>net.sf.py4j</groupId>
+ <artifactId>py4j</artifactId>
+ <versioning>
+ <release>0.7</release>
+ <versions>
+ <version>0.7</version>
+ </versions>
+ <lastUpdated>20130828020333</lastUpdated>
+ </versioning>
+</metadata>
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ca20ccadba..74990b6361 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -1,4 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
@@ -13,29 +30,13 @@
<name>Spark Project Assembly</name>
<url>http://spark-project.org/</url>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>2.4</version>
- <executions>
- <execution>
- <id>dist</id>
- <phase>package</phase>
- <goals>
- <goal>single</goal>
- </goals>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/assembly.xml</descriptor>
- </descriptors>
- </configuration>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
+ <repositories>
+ <!-- A repository in the local filesystem for the Py4J JAR, which is not in Maven central -->
+ <repository>
+ <id>lib</id>
+ <url>file://${project.basedir}/lib</url>
+ </repository>
+ </repositories>
<dependencies>
<dependency>
@@ -63,5 +64,96 @@
<artifactId>spark-streaming</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>net.sf.py4j</groupId>
+ <artifactId>py4j</artifactId>
+ <version>0.7</version>
+ </dependency>
</dependencies>
+
+ <build>
+ <plugins>
+ <!-- Use the shade plugin to create a big JAR with all the dependencies -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>reference.conf</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <profiles>
+ <profile>
+ <id>hadoop2-yarn</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.spark-project</groupId>
+ <artifactId>spark-yarn</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <id>bigtop-dist</id>
+ <!-- This profile uses the assembly plugin to create a special "dist" package for BigTop
+ that contains Spark but not the Hadoop JARs it depends on. -->
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.4</version>
+ <executions>
+ <execution>
+ <id>dist</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
</project>
diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml
index 14485b7181..4543b52c93 100644
--- a/assembly/src/main/assembly/assembly.xml
+++ b/assembly/src/main/assembly/assembly.xml
@@ -1,3 +1,19 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
<assembly>
<id>dist</id>
<formats>
@@ -36,7 +52,8 @@
</directory>
<outputDirectory>/bin</outputDirectory>
<includes>
- <include>run*</include>
+ <include>run-example*</include>
+ <include>spark-class*</include>
<include>spark-shell*</include>
<include>spark-executor*</include>
</includes>
diff --git a/examples/pom.xml b/examples/pom.xml
index d24bd404fa..687fbcca8f 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -36,21 +36,25 @@
<groupId>org.spark-project</groupId>
<artifactId>spark-core</artifactId>
<version>${project.version}</version>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-streaming</artifactId>
<version>${project.version}</version>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-mllib</artifactId>
<version>${project.version}</version>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.spark-project</groupId>
<artifactId>spark-bagel</artifactId>
<version>${project.version}</version>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
@@ -68,10 +72,6 @@
</exclusions>
</dependency>
<dependency>
- <groupId>org.scala-lang</groupId>
- <artifactId>scala-library</artifactId>
- </dependency>
- <dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</dependency>
@@ -126,13 +126,63 @@
</exclusions>
</dependency>
</dependencies>
+
+ <profiles>
+ <profile>
+ <id>hadoop2-yarn</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.spark-project</groupId>
+ <artifactId>spark-yarn</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ </profiles>
+
<build>
<outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.version}/${project.artifactId}-assembly-${project.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+ <resource>reference.conf</resource>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
</plugin>
</plugins>
</build>
diff --git a/pom.xml b/pom.xml
index ea9548359f..e2fd54a966 100644
--- a/pom.xml
+++ b/pom.xml
@@ -62,6 +62,7 @@
<module>tools</module>
<module>streaming</module>
<module>repl</module>
+ <module>assembly</module>
</modules>
<properties>
@@ -75,7 +76,7 @@
<slf4j.version>1.7.2</slf4j.version>
<log4j.version>1.2.17</log4j.version>
<hadoop.version>1.0.4</hadoop.version>
- <!-- <hadoop.version>2.0.0-mr1-cdh4.1.2</hadoop.version> -->
+ <hbase.version>0.94.6</hbase.version>
<PermGen>64m</PermGen>
<MaxPermGen>512m</MaxPermGen>
@@ -743,22 +744,11 @@
</dependencyManagement>
</profile>
<profile>
- <id>assembly</id>
+ <id>repl-bin</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<modules>
- <module>assembly</module>
- </modules>
- </profile>
- <profile>
- <id>expensive-modules</id>
- <activation>
- <property>
- <name>!noExpensive</name>
- </property>
- </activation>
- <modules>
<module>repl-bin</module>
</modules>
</profile>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 8797e65b8d..2e26812671 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -41,7 +41,7 @@ object SparkBuild extends Build {
.dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*)
lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
- .dependsOn(core, mllib, bagel, streaming)
+ .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*)
lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
@@ -261,7 +261,7 @@ object SparkBuild extends Build {
def yarnSettings = sharedSettings ++ Seq(
name := "spark-yarn"
- ) ++ extraYarnSettings ++ assemblySettings ++ extraAssemblySettings
+ ) ++ extraYarnSettings
// Conditionally include the YARN dependencies because some tools look at all sub-projects and will complain
// if we refer to nonexistent dependencies (e.g. hadoop-yarn-api from a Hadoop version without YARN).
diff --git a/run-example b/run-example
index e1b26257e1..ccd4356bdf 100755
--- a/run-example
+++ b/run-example
@@ -54,6 +54,11 @@ if [[ -z $SPARK_EXAMPLES_JAR ]]; then
exit 1
fi
+# Since the examples JAR ideally shouldn't include spark-core (that dependency should be
+# "provided"), also add our standard Spark classpath, built using compute-classpath.sh.
+CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH"
+
# Find java binary
if [ -n "${JAVA_HOME}" ]; then
RUNNER="${JAVA_HOME}/bin/java"
@@ -68,9 +73,9 @@ fi
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
echo -n "Spark Command: "
- echo "$RUNNER" -cp "$SPARK_EXAMPLES_JAR" "$@"
+ echo "$RUNNER" -cp "$CLASSPATH" "$@"
echo "========================================"
echo
fi
-exec "$RUNNER" -cp "$SPARK_EXAMPLES_JAR" "$@"
+exec "$RUNNER" -cp "$CLASSPATH" "$@"