aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorNishkam Ravi <nravi@cloudera.com>2015-03-29 12:40:37 +0100
committerSean Owen <sowen@cloudera.com>2015-03-29 12:40:37 +0100
commite3eb393961051a48ed1cac756ac1928156aa161f (patch)
tree60f5f5e1a0223c8a65b067e2a8e0b0aefe0c2a65 /bin
parent55153f5c14fad10607b44fbb8eebd9636a6bc2e1 (diff)
downloadspark-e3eb393961051a48ed1cac756ac1928156aa161f.tar.gz
spark-e3eb393961051a48ed1cac756ac1928156aa161f.tar.bz2
spark-e3eb393961051a48ed1cac756ac1928156aa161f.zip
[SPARK-6406] Launch Spark using assembly jar instead of a separate launcher jar
Author: Nishkam Ravi <nravi@cloudera.com> Author: nishkamravi2 <nishkamravi@gmail.com> Author: nravi <nravi@c1704.halxg.cloudera.com> Closes #5085 from nishkamravi2/master_nravi and squashes the following commits: bad4349 [nishkamravi2] Update Main.java 36a6f87 [Nishkam Ravi] Minor changes and bug fixes b7f4ae7 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 4a45d6a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 458af39 [Nishkam Ravi] Locate the jar using getLocation, obviates the need to pass assembly path as an argument d9658d6 [Nishkam Ravi] Changes for SPARK-6406 ccdc334 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 3faa7a4 [Nishkam Ravi] Launcher library changes (SPARK-6406) 345206a [Nishkam Ravi] spark-class merge Merge branch 'master_nravi' of https://github.com/nishkamravi2/spark into master_nravi ac58975 [Nishkam Ravi] spark-class changes 06bfeb0 [nishkamravi2] Update spark-class 35af990 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 32c3ab3 [nishkamravi2] Update AbstractCommandBuilder.java 4bd4489 [nishkamravi2] Update AbstractCommandBuilder.java 746f35b [Nishkam Ravi] "hadoop" string in the assembly name should not be mandatory (everywhere else in spark we mandate spark-assembly*hadoop*.jar) bfe96e0 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi ee902fa [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi d453197 [nishkamravi2] Update NewHadoopRDD.scala 6f41a1d [nishkamravi2] Update NewHadoopRDD.scala 0ce2c32 [nishkamravi2] Update HadoopRDD.scala f7e33c2 [Nishkam Ravi] Merge branch 'master_nravi' of https://github.com/nishkamravi2/spark into master_nravi ba1eb8b [Nishkam Ravi] Try-catch block around the two occurrences of removeShutDownHook. Deletion of semi-redundant occurrences of expensive operation inShutDown. 71d0e17 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 494d8c0 [nishkamravi2] Update DiskBlockManager.scala 3c5ddba [nishkamravi2] Update DiskBlockManager.scala f0d12de [Nishkam Ravi] Workaround for IllegalStateException caused by recent changes to BlockManager.stop 79ea8b4 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi b446edc [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 5c9a4cb [nishkamravi2] Update TaskSetManagerSuite.scala 535295a [nishkamravi2] Update TaskSetManager.scala 3e1b616 [Nishkam Ravi] Modify test for maxResultSize 9f6583e [Nishkam Ravi] Changes to maxResultSize code (improve error message and add condition to check if maxResultSize > 0) 5f8f9ed [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi 636a9ff [nishkamravi2] Update YarnAllocator.scala 8f76c8b [Nishkam Ravi] Doc change for yarn memory overhead 35daa64 [Nishkam Ravi] Slight change in the doc for yarn memory overhead 5ac2ec1 [Nishkam Ravi] Remove out dac1047 [Nishkam Ravi] Additional documentation for yarn memory overhead issue 42c2c3d [Nishkam Ravi] Additional changes for yarn memory overhead issue 362da5e [Nishkam Ravi] Additional changes for yarn memory overhead c726bd9 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi f00fa31 [Nishkam Ravi] Improving logging for AM memoryOverhead 1cf2d1e [nishkamravi2] Update YarnAllocator.scala ebcde10 [Nishkam Ravi] Modify default YARN memory_overhead-- from an additive constant to a multiplier (redone to resolve merge conflicts) 2e69f11 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi efd688a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark 2b630f9 [nravi] Accept memory input as "30g", "512M" instead of an int value, to be consistent with rest of Spark 3bf8fad [nravi] Merge branch 'master' of https://github.com/apache/spark 5423a03 [nravi] Merge branch 'master' of https://github.com/apache/spark eb663ca [nravi] Merge branch 'master' of https://github.com/apache/spark df2aeb1 [nravi] Improved fix for ConcurrentModificationIssue (Spark-1097, Hadoop-10456) 6b840f0 [nravi] Undo the fix for SPARK-1758 (the problem is fixed) 5108700 [nravi] Fix in Spark for the Concurrent thread modification issue (SPARK-1097, HADOOP-10456) 681b36f [nravi] Fix for SPARK-1758: failing test org.apache.spark.JavaAPISuite.wholeTextFiles
Diffstat (limited to 'bin')
-rwxr-xr-xbin/spark-class61
-rw-r--r--bin/spark-class2.cmd33
2 files changed, 47 insertions, 47 deletions
diff --git a/bin/spark-class b/bin/spark-class
index e29b234afa..c03946d92e 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -40,35 +40,46 @@ else
fi
fi
-# Look for the launcher. In non-release mode, add the compiled classes directly to the classpath
-# instead of looking for a jar file.
-SPARK_LAUNCHER_CP=
-if [ -f $SPARK_HOME/RELEASE ]; then
- LAUNCHER_DIR="$SPARK_HOME/lib"
- num_jars="$(ls -1 "$LAUNCHER_DIR" | grep "^spark-launcher.*\.jar$" | wc -l)"
- if [ "$num_jars" -eq "0" -a -z "$SPARK_LAUNCHER_CP" ]; then
- echo "Failed to find Spark launcher in $LAUNCHER_DIR." 1>&2
- echo "You need to build Spark before running this program." 1>&2
- exit 1
- fi
+# Find assembly jar
+SPARK_ASSEMBLY_JAR=
+if [ -f "$SPARK_HOME/RELEASE" ]; then
+ ASSEMBLY_DIR="$SPARK_HOME/lib"
+else
+ ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
+fi
- LAUNCHER_JARS="$(ls -1 "$LAUNCHER_DIR" | grep "^spark-launcher.*\.jar$" || true)"
- if [ "$num_jars" -gt "1" ]; then
- echo "Found multiple Spark launcher jars in $LAUNCHER_DIR:" 1>&2
- echo "$LAUNCHER_JARS" 1>&2
- echo "Please remove all but one jar." 1>&2
- exit 1
- fi
+num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" | wc -l)"
+if [ "$num_jars" -eq "0" -a -z "$SPARK_ASSEMBLY_JAR" ]; then
+ echo "Failed to find Spark assembly in $ASSEMBLY_DIR." 1>&2
+ echo "You need to build Spark before running this program." 1>&2
+ exit 1
+fi
+ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^spark-assembly.*hadoop.*\.jar$" || true)"
+if [ "$num_jars" -gt "1" ]; then
+ echo "Found multiple Spark assembly jars in $ASSEMBLY_DIR:" 1>&2
+ echo "$ASSEMBLY_JARS" 1>&2
+ echo "Please remove all but one jar." 1>&2
+ exit 1
+fi
- SPARK_LAUNCHER_CP="${LAUNCHER_DIR}/${LAUNCHER_JARS}"
+SPARK_ASSEMBLY_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
+
+# Verify that versions of java used to build the jars and run Spark are compatible
+if [ -n "$JAVA_HOME" ]; then
+ JAR_CMD="$JAVA_HOME/bin/jar"
else
- LAUNCHER_DIR="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION"
- if [ ! -d "$LAUNCHER_DIR/classes" ]; then
- echo "Failed to find Spark launcher classes in $LAUNCHER_DIR." 1>&2
- echo "You need to build Spark before running this program." 1>&2
+ JAR_CMD="jar"
+fi
+
+if [ $(command -v "$JAR_CMD") ] ; then
+ jar_error_check=$("$JAR_CMD" -tf "$SPARK_ASSEMBLY_JAR" nonexistent/class/path 2>&1)
+ if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
+ echo "Loading Spark jar with '$JAR_CMD' failed. " 1>&2
+ echo "This is likely because Spark was compiled with Java 7 and run " 1>&2
+ echo "with Java 6. (see SPARK-1703). Please use Java 7 to run Spark " 1>&2
+ echo "or build Spark with Java 6." 1>&2
exit 1
fi
- SPARK_LAUNCHER_CP="$LAUNCHER_DIR/classes"
fi
# The launcher library will print arguments separated by a NULL character, to allow arguments with
@@ -77,7 +88,7 @@ fi
CMD=()
while IFS= read -d '' -r ARG; do
CMD+=("$ARG")
-done < <("$RUNNER" -cp "$SPARK_LAUNCHER_CP" org.apache.spark.launcher.Main "$@")
+done < <("$RUNNER" -cp "$SPARK_ASSEMBLY_JAR" org.apache.spark.launcher.Main "$@")
if [ "${CMD[0]}" = "usage" ]; then
"${CMD[@]}"
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 37d22215a0..4ce727bc99 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -29,31 +29,20 @@ if "x%1"=="x" (
exit /b 1
)
-set LAUNCHER_CP=0
-if exist %SPARK_HOME%\RELEASE goto find_release_launcher
+rem Find assembly jar
+set SPARK_ASSEMBLY_JAR=0
-rem Look for the Spark launcher in both Scala build directories. The launcher doesn't use Scala so
-rem it doesn't really matter which one is picked up. Add the compiled classes directly to the
-rem classpath instead of looking for a jar file, since it's very common for people using sbt to use
-rem the "assembly" target instead of "package".
-set LAUNCHER_CLASSES=%SPARK_HOME%\launcher\target\scala-2.10\classes
-if exist %LAUNCHER_CLASSES% (
- set LAUNCHER_CP=%LAUNCHER_CLASSES%
+if exist "%SPARK_HOME%\RELEASE" (
+ set ASSEMBLY_DIR=%SPARK_HOME%\lib
+) else (
+ set ASSEMBLY_DIR=%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%
)
-set LAUNCHER_CLASSES=%SPARK_HOME%\launcher\target\scala-2.11\classes
-if exist %LAUNCHER_CLASSES% (
- set LAUNCHER_CP=%LAUNCHER_CLASSES%
-)
-goto check_launcher
-:find_release_launcher
-for %%d in (%SPARK_HOME%\lib\spark-launcher*.jar) do (
- set LAUNCHER_CP=%%d
+for %%d in (%ASSEMBLY_DIR%\spark-assembly*hadoop*.jar) do (
+ set SPARK_ASSEMBLY_JAR=%%d
)
-
-:check_launcher
-if "%LAUNCHER_CP%"=="0" (
- echo Failed to find Spark launcher JAR.
+if "%SPARK_ASSEMBLY_JAR%"=="0" (
+ echo Failed to find Spark assembly JAR.
echo You need to build Spark before running this program.
exit /b 1
)
@@ -64,7 +53,7 @@ if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
rem The launcher library prints the command to be executed in a single line suitable for being
rem executed by the batch interpreter. So read all the output of the launcher into a variable.
-for /f "tokens=*" %%i in ('cmd /C ""%RUNNER%" -cp %LAUNCHER_CP% org.apache.spark.launcher.Main %*"') do (
+for /f "tokens=*" %%i in ('cmd /C ""%RUNNER%" -cp %SPARK_ASSEMBLY_JAR% org.apache.spark.launcher.Main %*"') do (
set SPARK_CMD=%%i
)
%SPARK_CMD%