From dfbdc9ddb773e2b1149e6a6c661f14b631b692d0 Mon Sep 17 00:00:00 2001 From: shane-huang Date: Mon, 23 Sep 2013 11:28:58 +0800 Subject: added spark-class and spark-executor to sbin Signed-off-by: shane-huang --- python/pyspark/java_gateway.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python') diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index e615c1e9b6..f7834ef803 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -31,7 +31,7 @@ def launch_gateway(): # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and SPARK_MEM settings from spark-env.sh on_windows = platform.system() == "Windows" - script = "spark-class.cmd" if on_windows else "spark-class" + script = "/sbin/spark-class.cmd" if on_windows else "/sbin/spark-class" command = [os.path.join(SPARK_HOME, script), "py4j.GatewayServer", "--die-on-broken-pipe", "0"] if not on_windows: -- cgit v1.2.3 From 1d53792a0a48695824c29274be84b74d8d6a2e6a Mon Sep 17 00:00:00 2001 From: shane-huang Date: Mon, 23 Sep 2013 16:13:46 +0800 Subject: add scripts in bin Signed-off-by: shane-huang --- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- bin/run-example | 4 +- bin/run-example2.cmd | 4 +- bin/spark | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++ bin/spark-shell | 4 +- bin/spark-shell.cmd | 5 ++- bin/spark.cmd | 27 ++++++++++++ python/run-tests | 2 +- sbin/spark-class | 4 +- sbin/spark-class2.cmd | 4 +- sbin/spark-executor | 5 ++- 12 files changed, 163 insertions(+), 17 deletions(-) create mode 100755 bin/spark create mode 100644 bin/spark.cmd (limited to 'python') diff --git a/bin/pyspark b/bin/pyspark index 4941a36d0d..45a2308cee 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -18,7 +18,7 @@ # # Figure out where the Scala framework is installed -FWDIR="$(cd `dirname $0`; pwd)" +FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index f58e349643..bb8e624c2f 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -20,7 +20,7 @@ rem set SCALA_VERSION=2.9.3 rem Figure out where the Spark framework is installed -set FWDIR=%~dp0 +set FWDIR=%~dp0..\ rem Export this as SPARK_HOME set SPARK_HOME=%FWDIR% diff --git a/bin/run-example b/bin/run-example index 08ec717ca5..f29bb2c015 100755 --- a/bin/run-example +++ b/bin/run-example @@ -20,7 +20,7 @@ SCALA_VERSION=2.9.3 # Figure out where the Scala framework is installed -FWDIR="$(cd `dirname $0`; pwd)" +FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" @@ -56,7 +56,7 @@ fi # Since the examples JAR ideally shouldn't include spark-core (that dependency should be # "provided"), also add our standard Spark classpath, built using compute-classpath.sh. -CLASSPATH=`$FWDIR/bin/compute-classpath.sh` +CLASSPATH=`$FWDIR/sbin/compute-classpath.sh` CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH" # Find java binary diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd index dbb371ecfc..e250a92965 100644 --- a/bin/run-example2.cmd +++ b/bin/run-example2.cmd @@ -20,7 +20,7 @@ rem set SCALA_VERSION=2.9.3 rem Figure out where the Spark framework is installed -set FWDIR=%~dp0 +set FWDIR=%~dp0..\ rem Export this as SPARK_HOME set SPARK_HOME=%FWDIR% @@ -49,7 +49,7 @@ if "x%SPARK_EXAMPLES_JAR%"=="x" ( rem Compute Spark classpath using external script set DONT_PRINT_CLASSPATH=1 -call "%FWDIR%bin\compute-classpath.cmd" +call "%FWDIR%sbin\compute-classpath.cmd" set DONT_PRINT_CLASSPATH=0 set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH% diff --git a/bin/spark b/bin/spark new file mode 100755 index 0000000000..7f25fe1050 --- /dev/null +++ b/bin/spark @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +SCALA_VERSION=2.9.3 + +# Figure out where the Scala framework is installed +FWDIR="$(cd `dirname $0`/..; pwd)" + +# Export this as SPARK_HOME +export SPARK_HOME="$FWDIR" + +# Load environment variables from conf/spark-env.sh, if it exists +if [ -e $FWDIR/conf/spark-env.sh ] ; then + . $FWDIR/conf/spark-env.sh +fi + +if [ -z "$1" ]; then + echo "Usage: spark-class []" >&2 + exit 1 +fi + +# If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable +# values for that; it doesn't need a lot +if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then + SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m} + SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true" + # Do not overwrite SPARK_JAVA_OPTS environment variable in this script + OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS" # Empty by default +else + OUR_JAVA_OPTS="$SPARK_JAVA_OPTS" +fi + + +# Add java opts for master, worker, executor. The opts maybe null +case "$1" in + 'org.apache.spark.deploy.master.Master') + OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS" + ;; + 'org.apache.spark.deploy.worker.Worker') + OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS" + ;; + 'org.apache.spark.executor.StandaloneExecutorBackend') + OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS" + ;; + 'org.apache.spark.executor.MesosExecutorBackend') + OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS" + ;; + 'org.apache.spark.repl.Main') + OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS" + ;; +esac + +# Find the java binary +if [ -n "${JAVA_HOME}" ]; then + RUNNER="${JAVA_HOME}/bin/java" +else + if [ `command -v java` ]; then + RUNNER="java" + else + echo "JAVA_HOME is not set" >&2 + exit 1 + fi +fi + +# Set SPARK_MEM if it isn't already set since we also use it for this process +SPARK_MEM=${SPARK_MEM:-512m} +export SPARK_MEM + +# Set JAVA_OPTS to be able to load native libraries and to set heap size +JAVA_OPTS="$OUR_JAVA_OPTS" +JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH" +JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM" +# Load extra JAVA_OPTS from conf/java-opts, if it exists +if [ -e $FWDIR/conf/java-opts ] ; then + JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" +fi +export JAVA_OPTS +# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala! + +if [ ! -f "$FWDIR/RELEASE" ]; then + # Exit if the user hasn't compiled Spark + ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null + if [[ $? != 0 ]]; then + echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2 + echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 + exit 1 + fi +fi + +# Compute classpath using external script +CLASSPATH=`$FWDIR/sbin/compute-classpath.sh` +export CLASSPATH + +if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then + echo -n "Spark Command: " + echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" + echo "========================================" + echo +fi + +exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" diff --git a/bin/spark-shell b/bin/spark-shell index 9608bd3f30..6717fe7049 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -28,7 +28,7 @@ # Enter posix mode for bash set -o posix -FWDIR="`dirname $0`" +FWDIR="$(cd `dirname $0`/..; pwd)" for o in "$@"; do if [ "$1" = "-c" -o "$1" = "--cores" ]; then @@ -79,7 +79,7 @@ if [[ ! $? ]]; then saved_stty="" fi -$FWDIR/spark-class $OPTIONS org.apache.spark.repl.Main "$@" +$FWDIR/sbin/spark-class $OPTIONS org.apache.spark.repl.Main "$@" # record the exit status lest it be overwritten: # then reenable echo and propagate the code. diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd index 3e52bf835e..23973e3e3d 100644 --- a/bin/spark-shell.cmd +++ b/bin/spark-shell.cmd @@ -17,6 +17,7 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem -set FWDIR=%~dp0 +rem Find the path of sbin +set SBIN=%~dp0..\sbin\ -cmd /V /E /C %FWDIR%spark-class2.cmd org.apache.spark.repl.Main %* +cmd /V /E /C %SBIN%spark-class2.cmd org.apache.spark.repl.Main %* diff --git a/bin/spark.cmd b/bin/spark.cmd new file mode 100644 index 0000000000..a162339215 --- /dev/null +++ b/bin/spark.cmd @@ -0,0 +1,27 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +rem http://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem + +rem This is the entry point for running a Spark class. To avoid polluting +rem the environment, it just launches a new cmd to do the real work. + + +rem Find the path of sbin +set SBIN=%~dp0..\sbin\ + +cmd /V /E /C %SBIN%spark-class2.cmd %* diff --git a/python/run-tests b/python/run-tests index cbc554ea9d..8a08ae3df9 100755 --- a/python/run-tests +++ b/python/run-tests @@ -29,7 +29,7 @@ FAILED=0 rm -f unit-tests.log function run_test() { - $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log + $FWDIR/bin/pyspark $1 2>&1 | tee -a unit-tests.log FAILED=$((PIPESTATUS[0]||$FAILED)) } diff --git a/sbin/spark-class b/sbin/spark-class index e111ef6da7..7f25fe1050 100755 --- a/sbin/spark-class +++ b/sbin/spark-class @@ -20,7 +20,7 @@ SCALA_VERSION=2.9.3 # Figure out where the Scala framework is installed -FWDIR="$(cd `dirname $0`; pwd)" +FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" @@ -104,7 +104,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then fi # Compute classpath using external script -CLASSPATH=`$FWDIR/bin/compute-classpath.sh` +CLASSPATH=`$FWDIR/sbin/compute-classpath.sh` export CLASSPATH if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then diff --git a/sbin/spark-class2.cmd b/sbin/spark-class2.cmd index d4d853e8ad..5e00bd39ea 100644 --- a/sbin/spark-class2.cmd +++ b/sbin/spark-class2.cmd @@ -20,7 +20,7 @@ rem set SCALA_VERSION=2.9.3 rem Figure out where the Spark framework is installed -set FWDIR=%~dp0 +set FWDIR=%~dp0..\ rem Export this as SPARK_HOME set SPARK_HOME=%FWDIR% @@ -67,7 +67,7 @@ if "%FOUND_JAR%"=="0" ( rem Compute classpath using external script set DONT_PRINT_CLASSPATH=1 -call "%FWDIR%bin\compute-classpath.cmd" +call "%FWDIR%sbin\compute-classpath.cmd" set DONT_PRINT_CLASSPATH=0 rem Figure out where java is. diff --git a/sbin/spark-executor b/sbin/spark-executor index 2c07c54843..214e00f6f8 100755 --- a/sbin/spark-executor +++ b/sbin/spark-executor @@ -17,6 +17,7 @@ # limitations under the License. # -FWDIR="`dirname $0`" +FWDIR="$(cd `dirname $0`/..; pwd)" + echo "Running spark-executor with framework dir = $FWDIR" -exec $FWDIR/spark-class org.apache.spark.executor.MesosExecutorBackend +exec $FWDIR/sbin/spark-class org.apache.spark.executor.MesosExecutorBackend -- cgit v1.2.3 From e8b1ee04fcb4cd7b666d3148d6d5ff148551ce72 Mon Sep 17 00:00:00 2001 From: shane-huang Date: Thu, 26 Sep 2013 17:08:47 +0800 Subject: fix paths and change spark to use APP_MEM as application driver memory instead of SPARK_MEM, user should add application jars to SPARK_CLASSPATH Signed-off-by: shane-huang --- bin/spark | 41 +++++----------------- .../mesos/CoarseMesosSchedulerBackend.scala | 2 +- python/pyspark/java_gateway.py | 2 +- 3 files changed, 10 insertions(+), 35 deletions(-) (limited to 'python') diff --git a/bin/spark b/bin/spark index 7f25fe1050..aa005a51f5 100755 --- a/bin/spark +++ b/bin/spark @@ -31,40 +31,11 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then fi if [ -z "$1" ]; then - echo "Usage: spark-class []" >&2 + echo "Usage: spark []" >&2 + echo "Usage: export SPARK_CLASSPATH before running the command" >&2 exit 1 fi -# If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable -# values for that; it doesn't need a lot -if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then - SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m} - SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true" - # Do not overwrite SPARK_JAVA_OPTS environment variable in this script - OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS" # Empty by default -else - OUR_JAVA_OPTS="$SPARK_JAVA_OPTS" -fi - - -# Add java opts for master, worker, executor. The opts maybe null -case "$1" in - 'org.apache.spark.deploy.master.Master') - OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS" - ;; - 'org.apache.spark.deploy.worker.Worker') - OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS" - ;; - 'org.apache.spark.executor.StandaloneExecutorBackend') - OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS" - ;; - 'org.apache.spark.executor.MesosExecutorBackend') - OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS" - ;; - 'org.apache.spark.repl.Main') - OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS" - ;; -esac # Find the java binary if [ -n "${JAVA_HOME}" ]; then @@ -78,14 +49,18 @@ else fi fi -# Set SPARK_MEM if it isn't already set since we also use it for this process +# Set SPARK_MEM if it isn't already set SPARK_MEM=${SPARK_MEM:-512m} export SPARK_MEM +# Set APP_MEM if it isn't already set, we use this for this process as the app driver process may need +# as much memory as specified in SPARK_MEM +APP_MEM=${APP_MEM:-512m} + # Set JAVA_OPTS to be able to load native libraries and to set heap size JAVA_OPTS="$OUR_JAVA_OPTS" JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH" -JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM" +JAVA_OPTS="$JAVA_OPTS -Xms$APP_MEM -Xmx$APP_MEM" # Load extra JAVA_OPTS from conf/java-opts, if it exists if [ -e $FWDIR/conf/java-opts ] ; then JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`" diff --git a/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala index 9f93491e5a..544b20550e 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala @@ -125,7 +125,7 @@ private[spark] class CoarseMesosSchedulerBackend( StandaloneSchedulerBackend.ACTOR_NAME) val uri = System.getProperty("spark.executor.uri") if (uri == null) { - val runScript = new File(sparkHome, "/sbin/spark-class").getCanonicalPath + val runScript = new File(sparkHome, "./sbin/spark-class").getCanonicalPath command.setValue( "\"%s\" org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format( runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index f7834ef803..b872ae61d5 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -31,7 +31,7 @@ def launch_gateway(): # Launch the Py4j gateway using Spark's run command so that we pick up the # proper classpath and SPARK_MEM settings from spark-env.sh on_windows = platform.system() == "Windows" - script = "/sbin/spark-class.cmd" if on_windows else "/sbin/spark-class" + script = "./sbin/spark-class.cmd" if on_windows else "./sbin/spark-class" command = [os.path.join(SPARK_HOME, script), "py4j.GatewayServer", "--die-on-broken-pipe", "0"] if not on_windows: -- cgit v1.2.3