aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/run-example55
-rw-r--r--bin/run-example.cmd7
-rw-r--r--bin/run-example2.cmd85
-rwxr-xr-xdev/make-distribution.sh5
-rw-r--r--examples/pom.xml54
-rw-r--r--launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java68
-rw-r--r--launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java18
-rw-r--r--pom.xml3
-rw-r--r--project/SparkBuild.scala41
9 files changed, 157 insertions, 179 deletions
diff --git a/bin/run-example b/bin/run-example
index e1b0d5789b..dd0e3c4120 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -21,56 +21,5 @@ if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi
-EXAMPLES_DIR="${SPARK_HOME}"/examples
-
-. "${SPARK_HOME}"/bin/load-spark-env.sh
-
-if [ -n "$1" ]; then
- EXAMPLE_CLASS="$1"
- shift
-else
- echo "Usage: ./bin/run-example <example-class> [example-args]" 1>&2
- echo " - set MASTER=XX to use a specific master" 1>&2
- echo " - can use abbreviated example class name relative to com.apache.spark.examples" 1>&2
- echo " (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)" 1>&2
- exit 1
-fi
-
-if [ -f "${SPARK_HOME}/RELEASE" ]; then
- JAR_PATH="${SPARK_HOME}/lib"
-else
- JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
-fi
-
-JAR_COUNT=0
-
-for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do
- if [[ ! -e "$f" ]]; then
- echo "Failed to find Spark examples assembly in ${SPARK_HOME}/lib or ${SPARK_HOME}/examples/target" 1>&2
- echo "You need to build Spark before running this program" 1>&2
- exit 1
- fi
- SPARK_EXAMPLES_JAR="$f"
- JAR_COUNT=$((JAR_COUNT+1))
-done
-
-if [ "$JAR_COUNT" -gt "1" ]; then
- echo "Found multiple Spark examples assembly jars in ${JAR_PATH}" 1>&2
- ls "${JAR_PATH}"/spark-examples-*hadoop*.jar 1>&2
- echo "Please remove all but one jar." 1>&2
- exit 1
-fi
-
-export SPARK_EXAMPLES_JAR
-
-EXAMPLE_MASTER=${MASTER:-"local[*]"}
-
-if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
- EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
-fi
-
-exec "${SPARK_HOME}"/bin/spark-submit \
- --master $EXAMPLE_MASTER \
- --class $EXAMPLE_CLASS \
- "$SPARK_EXAMPLES_JAR" \
- "$@"
+export _SPARK_CMD_USAGE="Usage: ./bin/run-example [options] example-class [example args]"
+exec "${SPARK_HOME}"/bin/spark-submit run-example "$@"
diff --git a/bin/run-example.cmd b/bin/run-example.cmd
index 64f6bc3728..f9b786e92b 100644
--- a/bin/run-example.cmd
+++ b/bin/run-example.cmd
@@ -17,7 +17,6 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem
-rem This is the entry point for running a Spark example. To avoid polluting
-rem the environment, it just launches a new cmd to do the real work.
-
-cmd /V /E /C "%~dp0run-example2.cmd" %*
+set SPARK_HOME=%~dp0..
+set _SPARK_CMD_USAGE=Usage: ./bin/run-example [options] example-class [example args]
+cmd /V /E /C "%~dp0spark-submit.cmd" run-example %*
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
deleted file mode 100644
index fada43581d..0000000000
--- a/bin/run-example2.cmd
+++ /dev/null
@@ -1,85 +0,0 @@
-@echo off
-
-rem
-rem Licensed to the Apache Software Foundation (ASF) under one or more
-rem contributor license agreements. See the NOTICE file distributed with
-rem this work for additional information regarding copyright ownership.
-rem The ASF licenses this file to You under the Apache License, Version 2.0
-rem (the "License"); you may not use this file except in compliance with
-rem the License. You may obtain a copy of the License at
-rem
-rem http://www.apache.org/licenses/LICENSE-2.0
-rem
-rem Unless required by applicable law or agreed to in writing, software
-rem distributed under the License is distributed on an "AS IS" BASIS,
-rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-rem See the License for the specific language governing permissions and
-rem limitations under the License.
-rem
-
-set SCALA_VERSION=2.10
-
-rem Figure out where the Spark framework is installed
-set SPARK_HOME=%~dp0..
-
-call "%SPARK_HOME%\bin\load-spark-env.cmd"
-
-rem Test that an argument was given
-if not "x%1"=="x" goto arg_given
- echo Usage: run-example ^<example-class^> [example-args]
- echo - set MASTER=XX to use a specific master
- echo - can use abbreviated example class name relative to com.apache.spark.examples
- echo (e.g. SparkPi, mllib.LinearRegression, streaming.KinesisWordCountASL)
- goto exit
-:arg_given
-
-set EXAMPLES_DIR=%SPARK_HOME%\examples
-
-rem Figure out the JAR file that our examples were packaged into.
-set SPARK_EXAMPLES_JAR=
-if exist "%SPARK_HOME%\RELEASE" (
- for %%d in ("%SPARK_HOME%\lib\spark-examples*.jar") do (
- set SPARK_EXAMPLES_JAR=%%d
- )
-) else (
- for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do (
- set SPARK_EXAMPLES_JAR=%%d
- )
-)
-if "x%SPARK_EXAMPLES_JAR%"=="x" (
- echo Failed to find Spark examples assembly JAR.
- echo You need to build Spark before running this program.
- goto exit
-)
-
-rem Set master from MASTER environment variable if given
-if "x%MASTER%"=="x" (
- set EXAMPLE_MASTER=local[*]
-) else (
- set EXAMPLE_MASTER=%MASTER%
-)
-
-rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that
-set EXAMPLE_CLASS=%1
-set PREFIX=%EXAMPLE_CLASS:~0,25%
-if not %PREFIX%==org.apache.spark.examples (
- set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS%
-)
-
-rem Get the tail of the argument list, to skip the first one. This is surprisingly
-rem complicated on Windows.
-set "ARGS="
-:top
-shift
-if "%~1" neq "" (
- set ARGS=%ARGS% "%~1"
- goto :top
-)
-if defined ARGS set ARGS=%ARGS:~1%
-
-call "%SPARK_HOME%\bin\spark-submit.cmd" ^
- --master %EXAMPLE_MASTER% ^
- --class %EXAMPLE_CLASS% ^
- "%SPARK_EXAMPLES_JAR%" %ARGS%
-
-:exit
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index ac4e9b90f0..dbdd42ff9e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -166,11 +166,14 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy jars
cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
-cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
# This will fail if the -Pyarn profile is not provided
# In this case, silence the error and ignore the return code of this command
cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :
+# Copy examples and dependencies
+mkdir -p "$DISTDIR/examples/jars"
+cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"
+
# Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main"
cp -r "$SPARK_HOME"/examples/src/main "$DISTDIR/examples/src/"
diff --git a/examples/pom.xml b/examples/pom.xml
index 92bb373c73..1aa730c0dc 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -322,36 +322,36 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>prepare-test-jar</id>
+ <phase>none</phase>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
<configuration>
- <shadedArtifactAttached>false</shadedArtifactAttached>
- <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
- <artifactSet>
- <includes>
- <include>*:*</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- </excludes>
- </filter>
- </filters>
- <transformers>
- <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
- <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>reference.conf</resource>
- </transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
- <resource>log4j.properties</resource>
- </transformer>
- </transformers>
+ <outputDirectory>${jars.target.dir}</outputDirectory>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <configuration>
+ <includeScope>runtime</includeScope>
+ <outputDirectory>${jars.target.dir}</outputDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
<profiles>
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index b2dd6ac4c3..56e4107c5a 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -30,7 +30,8 @@ import static org.apache.spark.launcher.CommandBuilderUtils.*;
* driver-side options and special parsing behavior needed for the special-casing certain internal
* Spark applications.
* <p>
- * This class has also some special features to aid launching pyspark.
+ * This class has also some special features to aid launching shells (pyspark and sparkR) and also
+ * examples.
*/
class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
@@ -63,6 +64,17 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
static final String SPARKR_SHELL_RESOURCE = "sparkr-shell";
/**
+ * Name of app resource used to identify examples. When running examples, args[0] should be
+ * this name. The app resource will identify the example class to run.
+ */
+ static final String RUN_EXAMPLE = "run-example";
+
+ /**
+ * Prefix for example class names.
+ */
+ static final String EXAMPLE_CLASS_PREFIX = "org.apache.spark.examples.";
+
+ /**
* This map must match the class names for available special classes, since this modifies the way
* command line parsing works. This maps the class name to the resource to use when calling
* spark-submit.
@@ -78,6 +90,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
final List<String> sparkArgs;
private final boolean printInfo;
+ private final boolean isExample;
/**
* Controls whether mixing spark-submit arguments with app arguments is allowed. This is needed
@@ -89,10 +102,13 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
SparkSubmitCommandBuilder() {
this.sparkArgs = new ArrayList<>();
this.printInfo = false;
+ this.isExample = false;
}
SparkSubmitCommandBuilder(List<String> args) {
- this.sparkArgs = new ArrayList<>();
+ this.allowsMixedArguments = false;
+
+ boolean isExample = false;
List<String> submitArgs = args;
if (args.size() > 0 && args.get(0).equals(PYSPARK_SHELL)) {
this.allowsMixedArguments = true;
@@ -102,10 +118,14 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
this.allowsMixedArguments = true;
appResource = SPARKR_SHELL_RESOURCE;
submitArgs = args.subList(1, args.size());
- } else {
- this.allowsMixedArguments = false;
+ } else if (args.size() > 0 && args.get(0).equals(RUN_EXAMPLE)) {
+ isExample = true;
+ submitArgs = args.subList(1, args.size());
}
+ this.sparkArgs = new ArrayList<>();
+ this.isExample = isExample;
+
OptionParser parser = new OptionParser();
parser.parse(submitArgs);
this.printInfo = parser.infoRequested;
@@ -155,6 +175,10 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
args.add(propertiesFile);
}
+ if (isExample) {
+ jars.addAll(findExamplesJars());
+ }
+
if (!jars.isEmpty()) {
args.add(parser.JARS);
args.add(join(",", jars));
@@ -170,6 +194,9 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
args.add(join(",", pyFiles));
}
+ if (!printInfo) {
+ checkArgument(!isExample || mainClass != null, "Missing example class name.");
+ }
if (mainClass != null) {
args.add(parser.CLASS);
args.add(mainClass);
@@ -308,6 +335,25 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"));
}
+ private List<String> findExamplesJars() {
+ List<String> examplesJars = new ArrayList<>();
+ String sparkHome = getSparkHome();
+
+ File jarsDir;
+ if (new File(sparkHome, "RELEASE").isFile()) {
+ jarsDir = new File(sparkHome, "examples/jars");
+ } else {
+ jarsDir = new File(sparkHome,
+ String.format("examples/target/scala-%s/jars", getScalaVersion()));
+ }
+ checkState(jarsDir.isDirectory(), "Examples jars directory '%s' does not exist.",
+ jarsDir.getAbsolutePath());
+
+ for (File f: jarsDir.listFiles()) {
+ examplesJars.add(f.getAbsolutePath());
+ }
+ return examplesJars;
+ }
private class OptionParser extends SparkSubmitOptionParser {
@@ -367,6 +413,14 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
if (allowsMixedArguments) {
appArgs.add(opt);
return true;
+ } else if (isExample) {
+ String className = opt;
+ if (!className.startsWith(EXAMPLE_CLASS_PREFIX)) {
+ className = EXAMPLE_CLASS_PREFIX + className;
+ }
+ mainClass = className;
+ appResource = "spark-internal";
+ return false;
} else {
checkArgument(!opt.startsWith("-"), "Unrecognized option: %s", opt);
sparkArgs.add(opt);
@@ -376,8 +430,10 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
@Override
protected void handleExtraArgs(List<String> extra) {
- for (String arg : extra) {
- sparkArgs.add(arg);
+ if (isExample) {
+ appArgs.addAll(extra);
+ } else {
+ sparkArgs.addAll(extra);
}
}
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 00f967122b..b7f4f2efc5 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -151,6 +151,24 @@ public class SparkSubmitCommandBuilderSuite extends BaseSuite {
assertEquals("arg1", cmd.get(cmd.size() - 1));
}
+ @Test
+ public void testExamplesRunner() throws Exception {
+ List<String> sparkSubmitArgs = Arrays.asList(
+ SparkSubmitCommandBuilder.RUN_EXAMPLE,
+ parser.MASTER + "=foo",
+ parser.DEPLOY_MODE + "=bar",
+ "SparkPi",
+ "42");
+
+ Map<String, String> env = new HashMap<String, String>();
+ List<String> cmd = buildCommand(sparkSubmitArgs, env);
+ assertEquals("foo", findArgValue(cmd, parser.MASTER));
+ assertEquals("bar", findArgValue(cmd, parser.DEPLOY_MODE));
+ assertEquals(SparkSubmitCommandBuilder.EXAMPLE_CLASS_PREFIX + "SparkPi",
+ findArgValue(cmd, parser.CLASS));
+ assertEquals("42", cmd.get(cmd.size() - 1));
+ }
+
private void testCmdBuilder(boolean isDriver, boolean useDefaultPropertyFile) throws Exception {
String deployMode = isDriver ? "client" : "cluster";
diff --git a/pom.xml b/pom.xml
index 0faa691c5e..92a32e7797 100644
--- a/pom.xml
+++ b/pom.xml
@@ -178,6 +178,9 @@
<test.java.home>${java.home}</test.java.home>
<test.exclude.tags></test.exclude.tags>
+ <!-- Modules that copy jars to the build directory should do so under this location. -->
+ <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
+
<!--
Dependency scopes that can be overridden by enabling certain profiles. These profiles are
declared in the projects that build assemblies.
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index d7519e82b8..f76cda08ec 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -55,10 +55,12 @@ object BuildCommons {
Seq("yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl",
"docker-integration-tests").map(ProjectRef(buildLocation, _))
- val assemblyProjects@Seq(assembly, examples, networkYarn, streamingKafkaAssembly, streamingKinesisAslAssembly) =
- Seq("assembly", "examples", "network-yarn", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
+ val assemblyProjects@Seq(assembly, networkYarn, streamingKafkaAssembly, streamingKinesisAslAssembly) =
+ Seq("assembly", "network-yarn", "streaming-kafka-assembly", "streaming-kinesis-asl-assembly")
.map(ProjectRef(buildLocation, _))
+ val copyJarsProjects@Seq(examples) = Seq("examples").map(ProjectRef(buildLocation, _))
+
val tools = ProjectRef(buildLocation, "tools")
// Root project.
val spark = ProjectRef(buildLocation, "spark")
@@ -142,6 +144,8 @@ object SparkBuild extends PomBuild {
scalacOptions <+= target.map(t => "-P:genjavadoc:out=" + (t / "java")))
lazy val sharedSettings = sparkGenjavadocSettings ++ Seq (
+ exportJars in Compile := true,
+ exportJars in Test := false,
javaHome := sys.env.get("JAVA_HOME")
.orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() })
.map(file),
@@ -236,7 +240,7 @@ object SparkBuild extends PomBuild {
// Note ordering of these settings matter.
/* Enable shared settings on all projects */
- (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ Seq(spark, tools))
+ (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
.foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
ExcludedDependencies.settings))
@@ -255,6 +259,9 @@ object SparkBuild extends PomBuild {
/* Unsafe settings */
enable(Unsafe.settings)(unsafe)
+ /* Set up tasks to copy dependencies during packaging. */
+ copyJarsProjects.foreach(enable(CopyDependencies.settings))
+
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
@@ -686,6 +693,34 @@ object Unidoc {
)
}
+object CopyDependencies {
+
+ val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.")
+ val destPath = (crossTarget in Compile) / "jars"
+
+ lazy val settings = Seq(
+ copyDeps := {
+ val dest = destPath.value
+ if (!dest.isDirectory() && !dest.mkdirs()) {
+ throw new IOException("Failed to create jars directory.")
+ }
+
+ (dependencyClasspath in Compile).value.map(_.data)
+ .filter { jar => jar.isFile() }
+ .foreach { jar =>
+ val destJar = new File(dest, jar.getName())
+ if (destJar.isFile()) {
+ destJar.delete()
+ }
+ Files.copy(jar.toPath(), destJar.toPath())
+ }
+ },
+ crossTarget in (Compile, packageBin) := destPath.value,
+ packageBin in Compile <<= (packageBin in Compile).dependsOn(copyDeps)
+ )
+
+}
+
object Java8TestSettings {
import BuildCommons._