aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/spark-class61
-rw-r--r--bin/spark-class2.cmd33
-rw-r--r--launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java99
-rwxr-xr-xmake-distribution.sh1
4 files changed, 69 insertions, 125 deletions
diff --git a/bin/spark-class b/bin/spark-class
index e29b234afa..c03946d92e 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -40,35 +40,46 @@ else
fi
fi
-# Look for the launcher. In non-release mode, add the compiled classes directly to the classpath
-# instead of looking for a jar file.
-SPARK_LAUNCHER_CP=
-if [ -f $SPARK_HOME/RELEASE ]; then
- LAUNCHER_DIR="$SPARK_HOME/lib"
- num_jars="$(ls -1 "$LAUNCHER_DIR" | grep "^spark-launcher.*\.jar$" | wc -l)"
- if [ "$num_jars" -eq "0" -a -z "$SPARK_LAUNCHER_CP" ]; then
- echo "Failed to find Spark launcher in $LAUNCHER_DIR." 1>&2
- echo "You need to build Spark before running this program." 1>&2
- exit 1
- fi
+# Find assembly jar
+SPARK_ASSEMBLY_JAR=
+if [ -f "$SPARK_HOME/RELEASE" ]; then
+ ASSEMBLY_DIR="$SPARK_HOME/lib"
+else
+ ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
+fi
- LAUNCHER_JARS="$(ls -1 "$LAUNCHER_DIR" | grep "^spark-launcher.*\.jar$" || true)"
- if [ "$num_jars" -gt "1" ]; then
- echo "Found multiple Spark launcher jars in $LAUNCHER_DIR:" 1>&2
- echo "$LAUNCHER_JARS" 1>&2
- echo "Please remove all but one jar." 1>&2
- exit 1
- fi
+num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)"
+if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" ]; then
+ echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2
+ echo "You need to build Spark before running this program." 1>&2
+ exit 1
+fi
+ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)"
+if [ "$num_jars" -gt "1" ]; then
+ echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2
+ echo "$ASSEMBLY_JARS" 1>&2
+ echo "Please remove all but one jar." 1>&2
+ exit 1
+fi
- SPARK_LAUNCHER_CP="${LAUNCHER_DIR}/${LAUNCHER_JARS}"
+SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
+
+# Verify that versions of java used to build the jars and run Spark are compatible
+if [ -n "$JAVA_HOME" ]; then
+ JAR_CMD="$JAVA_HOME/bin/jar"
else
- LAUNCHER_DIR="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION"
- if [ ! -d "$LAUNCHER_DIR/classes" ]; then
- echo "Failed to find Spark launcher classes in $LAUNCHER_DIR." 1>&2
- echo "You need to build Spark before running this program." 1>&2
+ JAR_CMD="jar"
+fi
+
+if [ $(command -v "$JAR_CMD") ] ; then
+ jar_error_check=$("$JAR_CMD" -tf "$SPARK_ASSEMBLY_JAR" nonexistent/class/path 2>&1)
+ if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
+ echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
+ echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
+ echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
+ echo "or build Spark with Java 6." 1>&2
exit 1
fi
- SPARK_LAUNCHER_CP="$LAUNCHER_DIR/classes"
fi
# The launcher library will print arguments separated by a NULL character, to allow arguments with
@@ -77,7 +88,7 @@ fi
CMD=()
while IFS= read -d '' -r ARG; do
CMD+=("$ARG")
-done < <("$RUNNER" -cp "$SPARK_LAUNCHER_CP" org.apache.spark.launcher.Main "$@")
+done < <("$RUNNER" -cp "$SPARK_ASSEMBLY_JAR" org.apache.spark.launcher.Main "$@")
if [ "${CMD[0]}" = "usage" ]; then
"${CMD[@]}"
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 37d22215a0..4ce727bc99 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -29,31 +29,20 @@ if "x%1"=="x" (
exit /b 1
)
-set LAUNCHER_CP=0
-if exist %SPARK_HOME%\RELEASE goto find_release_launcher
+rem Find assembly jar
+set SPARK_ASSEMBLY_JAR=0
-rem Look for the Spark launcher in both Scala build directories. The launcher doesn't use Scala so
-rem it doesn't really matter which one is picked up. Add the compiled classes directly to the
-rem classpath instead of looking for a jar file, since it's very common for people using sbt to use
-rem the "assembly" target instead of "package".
-set LAUNCHER_CLASSES=%SPARK_HOME%\launcher\target\scala-2.10\classes
-if exist %LAUNCHER_CLASSES% (
- set LAUNCHER_CP=%LAUNCHER_CLASSES%
+if exist "%SPARK_HOME%\RELEASE" (
+ set ASSEMBLY_DIR=%SPARK_HOME%\lib
+) else (
+ set ASSEMBLY_DIR=%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%
)
-set LAUNCHER_CLASSES=%SPARK_HOME%\launcher\target\scala-2.11\classes
-if exist %LAUNCHER_CLASSES% (
- set LAUNCHER_CP=%LAUNCHER_CLASSES%
-)
-goto check_launcher
-:find_release_launcher
-for %%d in (%SPARK_HOME%\lib\spark-launcher*.jar) do (
- set LAUNCHER_CP=%%d
+for %%d in (%ASSEMBLY_DIR%\spark-assembly*hadoop*.jar) do (
+ set SPARK_ASSEMBLY_JAR=%%d
)
-
-:check_launcher
-if "%LAUNCHER_CP%"=="0" (
- echo Failed to find Spark launcher JAR.
+if "%SPARK_ASSEMBLY_JAR%"=="0" (
+ echo Failed to find Spark assembly JAR.
echo You need to build Spark before running this program.
exit /b 1
)
@@ -64,7 +53,7 @@ if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
rem The launcher library prints the command to be executed in a single line suitable for being
rem executed by the batch interpreter. So read all the output of the launcher into a variable.
-for /f "tokens=*" %%i in ('cmd /C ""%RUNNER%" -cp %LAUNCHER_CP% org.apache.spark.launcher.Main %*"') do (
+for /f "tokens=*" %%i in ('cmd /C ""%RUNNER%" -cp %SPARK_ASSEMBLY_JAR% org.apache.spark.launcher.Main %*"') do (
set SPARK_CMD=%%i
)
%SPARK_CMD%
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index 2da5f72787..d8279145d8 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -86,10 +86,14 @@ abstract class AbstractCommandBuilder {
*/
List<String> buildJavaCommand(String extraClassPath) throws IOException {
List<String> cmd = new ArrayList<String>();
- if (javaHome == null) {
- cmd.add(join(File.separator, System.getProperty("java.home"), "bin", "java"));
- } else {
+ String envJavaHome;
+
+ if (javaHome != null) {
cmd.add(join(File.separator, javaHome, "bin", "java"));
+ } else if ((envJavaHome = System.getenv("JAVA_HOME")) != null) {
+ cmd.add(join(File.separator, envJavaHome, "bin", "java"));
+ } else {
+ cmd.add(join(File.separator, System.getProperty("java.home"), "bin", "java"));
}
// Load extra JAVA_OPTS from conf/java-opts, if it exists.
@@ -182,59 +186,25 @@ abstract class AbstractCommandBuilder {
addToClassPath(cp, String.format("%s/core/target/jars/*", sparkHome));
}
- String assembly = findAssembly();
+ final String assembly = AbstractCommandBuilder.class.getProtectionDomain().getCodeSource().
+ getLocation().getPath();
addToClassPath(cp, assembly);
- // When Hive support is needed, Datanucleus jars must be included on the classpath. Datanucleus
- // jars do not work if only included in the uber jar as plugin.xml metadata is lost. Both sbt
- // and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is built
- // with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
- // assembly is built for Hive, before actually populating the CLASSPATH with the jars.
- //
- // This block also serves as a check for SPARK-1703, when the assembly jar is built with
- // Java 7 and ends up with too many files, causing issues with other JDK versions.
- boolean needsDataNucleus = false;
- JarFile assemblyJar = null;
- try {
- assemblyJar = new JarFile(assembly);
- needsDataNucleus = assemblyJar.getEntry("org/apache/hadoop/hive/ql/exec/") != null;
- } catch (IOException ioe) {
- if (ioe.getMessage().indexOf("invalid CEN header") >= 0) {
- System.err.println(
- "Loading Spark jar failed.\n" +
- "This is likely because Spark was compiled with Java 7 and run\n" +
- "with Java 6 (see SPARK-1703). Please use Java 7 to run Spark\n" +
- "or build Spark with Java 6.");
- System.exit(1);
- } else {
- throw ioe;
- }
- } finally {
- if (assemblyJar != null) {
- try {
- assemblyJar.close();
- } catch (IOException e) {
- // Ignore.
- }
- }
+ // Datanucleus jars must be included on the classpath. Datanucleus jars do not work if only
+ // included in the uber jar as plugin.xml metadata is lost. Both sbt and maven will populate
+ // "lib_managed/jars/" with the datanucleus jars when Spark is built with Hive
+ File libdir;
+ if (new File(sparkHome, "RELEASE").isFile()) {
+ libdir = new File(sparkHome, "lib");
+ } else {
+ libdir = new File(sparkHome, "lib_managed/jars");
}
- if (needsDataNucleus) {
- System.err.println("Spark assembly has been built with Hive, including Datanucleus jars " +
- "in classpath.");
- File libdir;
- if (new File(sparkHome, "RELEASE").isFile()) {
- libdir = new File(sparkHome, "lib");
- } else {
- libdir = new File(sparkHome, "lib_managed/jars");
- }
-
- checkState(libdir.isDirectory(), "Library directory '%s' does not exist.",
- libdir.getAbsolutePath());
- for (File jar : libdir.listFiles()) {
- if (jar.getName().startsWith("datanucleus-")) {
- addToClassPath(cp, jar.getAbsolutePath());
- }
+ checkState(libdir.isDirectory(), "Library directory '%s' does not exist.",
+ libdir.getAbsolutePath());
+ for (File jar : libdir.listFiles()) {
+ if (jar.getName().startsWith("datanucleus-")) {
+ addToClassPath(cp, jar.getAbsolutePath());
}
}
@@ -270,7 +240,6 @@ abstract class AbstractCommandBuilder {
if (scala != null) {
return scala;
}
-
String sparkHome = getSparkHome();
File scala210 = new File(sparkHome, "assembly/target/scala-2.10");
File scala211 = new File(sparkHome, "assembly/target/scala-2.11");
@@ -330,30 +299,6 @@ abstract class AbstractCommandBuilder {
return firstNonEmpty(childEnv.get(key), System.getenv(key));
}
- private String findAssembly() {
- String sparkHome = getSparkHome();
- File libdir;
- if (new File(sparkHome, "RELEASE").isFile()) {
- libdir = new File(sparkHome, "lib");
- checkState(libdir.isDirectory(), "Library directory '%s' does not exist.",
- libdir.getAbsolutePath());
- } else {
- libdir = new File(sparkHome, String.format("assembly/target/scala-%s", getScalaVersion()));
- }
-
- final Pattern re = Pattern.compile("spark-assembly.*hadoop.*\\.jar");
- FileFilter filter = new FileFilter() {
- @Override
- public boolean accept(File file) {
- return file.isFile() && re.matcher(file.getName()).matches();
- }
- };
- File[] assemblies = libdir.listFiles(filter);
- checkState(assemblies != null && assemblies.length > 0, "No assemblies found in '%s'.", libdir);
- checkState(assemblies.length == 1, "Multiple assemblies found in '%s'.", libdir);
- return assemblies[0].getAbsolutePath();
- }
-
private String getConfDir() {
String confDir = getenv("SPARK_CONF_DIR");
return confDir != null ? confDir : join(File.separator, getSparkHome(), "conf");
diff --git a/make-distribution.sh b/make-distribution.sh
index 9ed1abfe8c..738a9c4d69 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -199,7 +199,6 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy jars
cp "$SPARK_HOME"/assembly/target/scala*/*assembly*hadoop*.jar "$DISTDIR/lib/"
cp "$SPARK_HOME"/examples/target/scala*/spark-examples*.jar "$DISTDIR/lib/"
-cp "$SPARK_HOME"/launcher/target/spark-launcher_$SCALA_VERSION-$VERSION.jar "$DISTDIR/lib/"
# This will fail if the -Pyarn profile is not provided
# In this case, silence the error and ignore the return code of this command
cp "$SPARK_HOME"/network/yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/lib/" &> /dev/null || :