From dfbdc9ddb773e2b1149e6a6c661f14b631b692d0 Mon Sep 17 00:00:00 2001
From: shane-huang <shengsheng.huang@intel.com>
Date: Mon, 23 Sep 2013 11:28:58 +0800
Subject: added spark-class and spark-executor to sbin

Signed-off-by: shane-huang <shengsheng.huang@intel.com>
---
 python/pyspark/java_gateway.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'python')

diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index e615c1e9b6..f7834ef803 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -31,7 +31,7 @@ def launch_gateway():
     # Launch the Py4j gateway using Spark's run command so that we pick up the
     # proper classpath and SPARK_MEM settings from spark-env.sh
     on_windows = platform.system() == "Windows"
-    script = "spark-class.cmd" if on_windows else "spark-class"
+    script = "/sbin/spark-class.cmd" if on_windows else "/sbin/spark-class"
     command = [os.path.join(SPARK_HOME, script), "py4j.GatewayServer",
                "--die-on-broken-pipe", "0"]
     if not on_windows:
-- 
cgit v1.2.3


From 1d53792a0a48695824c29274be84b74d8d6a2e6a Mon Sep 17 00:00:00 2001
From: shane-huang <shengsheng.huang@intel.com>
Date: Mon, 23 Sep 2013 16:13:46 +0800
Subject: add scripts in bin

Signed-off-by: shane-huang <shengsheng.huang@intel.com>
---
 bin/pyspark           |   2 +-
 bin/pyspark2.cmd      |   2 +-
 bin/run-example       |   4 +-
 bin/run-example2.cmd  |   4 +-
 bin/spark             | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++
 bin/spark-shell       |   4 +-
 bin/spark-shell.cmd   |   5 ++-
 bin/spark.cmd         |  27 ++++++++++++
 python/run-tests      |   2 +-
 sbin/spark-class      |   4 +-
 sbin/spark-class2.cmd |   4 +-
 sbin/spark-executor   |   5 ++-
 12 files changed, 163 insertions(+), 17 deletions(-)
 create mode 100755 bin/spark
 create mode 100644 bin/spark.cmd

(limited to 'python')

diff --git a/bin/pyspark b/bin/pyspark
index 4941a36d0d..45a2308cee 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -18,7 +18,7 @@
 #
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index f58e349643..bb8e624c2f 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -20,7 +20,7 @@ rem
 set SCALA_VERSION=2.9.3
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0
+set FWDIR=%~dp0..\
 
 rem Export this as SPARK_HOME
 set SPARK_HOME=%FWDIR%
diff --git a/bin/run-example b/bin/run-example
index 08ec717ca5..f29bb2c015 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -20,7 +20,7 @@
 SCALA_VERSION=2.9.3
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
@@ -56,7 +56,7 @@ fi
 
 # Since the examples JAR ideally shouldn't include spark-core (that dependency should be
 # "provided"), also add our standard Spark classpath, built using compute-classpath.sh.
-CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+CLASSPATH=`$FWDIR/sbin/compute-classpath.sh`
 CLASSPATH="$SPARK_EXAMPLES_JAR:$CLASSPATH"
 
 # Find java binary
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
index dbb371ecfc..e250a92965 100644
--- a/bin/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -20,7 +20,7 @@ rem
 set SCALA_VERSION=2.9.3
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0
+set FWDIR=%~dp0..\
 
 rem Export this as SPARK_HOME
 set SPARK_HOME=%FWDIR%
@@ -49,7 +49,7 @@ if "x%SPARK_EXAMPLES_JAR%"=="x" (
 
 rem Compute Spark classpath using external script
 set DONT_PRINT_CLASSPATH=1
-call "%FWDIR%bin\compute-classpath.cmd"
+call "%FWDIR%sbin\compute-classpath.cmd"
 set DONT_PRINT_CLASSPATH=0
 set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
 
diff --git a/bin/spark b/bin/spark
new file mode 100755
index 0000000000..7f25fe1050
--- /dev/null
+++ b/bin/spark
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+SCALA_VERSION=2.9.3
+
+# Figure out where the Scala framework is installed
+FWDIR="$(cd `dirname $0`/..; pwd)"
+
+# Export this as SPARK_HOME
+export SPARK_HOME="$FWDIR"
+
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+  . $FWDIR/conf/spark-env.sh
+fi
+
+if [ -z "$1" ]; then
+  echo "Usage: spark-class <class> [<args>]" >&2
+  exit 1
+fi
+
+# If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable
+# values for that; it doesn't need a lot
+if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then
+  SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
+  SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
+  # Do not overwrite SPARK_JAVA_OPTS environment variable in this script
+  OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS"   # Empty by default
+else
+  OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
+fi
+
+
+# Add java opts for master, worker, executor. The opts maybe null
+case "$1" in
+  'org.apache.spark.deploy.master.Master')
+    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS"
+    ;;
+  'org.apache.spark.deploy.worker.Worker')
+    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS"
+    ;;
+  'org.apache.spark.executor.StandaloneExecutorBackend')
+    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+    ;;
+  'org.apache.spark.executor.MesosExecutorBackend')
+    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
+    ;;
+  'org.apache.spark.repl.Main')
+    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS"
+    ;;
+esac
+
+# Find the java binary
+if [ -n "${JAVA_HOME}" ]; then
+  RUNNER="${JAVA_HOME}/bin/java"
+else
+  if [ `command -v java` ]; then
+    RUNNER="java"
+  else
+    echo "JAVA_HOME is not set" >&2
+    exit 1
+  fi
+fi
+
+# Set SPARK_MEM if it isn't already set since we also use it for this process
+SPARK_MEM=${SPARK_MEM:-512m}
+export SPARK_MEM
+
+# Set JAVA_OPTS to be able to load native libraries and to set heap size
+JAVA_OPTS="$OUR_JAVA_OPTS"
+JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
+JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+# Load extra JAVA_OPTS from conf/java-opts, if it exists
+if [ -e $FWDIR/conf/java-opts ] ; then
+  JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
+fi
+export JAVA_OPTS
+# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
+
+if [ ! -f "$FWDIR/RELEASE" ]; then
+  # Exit if the user hasn't compiled Spark
+  ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null
+  if [[ $? != 0 ]]; then
+    echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2
+    echo "You need to build Spark with sbt/sbt assembly before running this program" >&2
+    exit 1
+  fi
+fi
+
+# Compute classpath using external script
+CLASSPATH=`$FWDIR/sbin/compute-classpath.sh`
+export CLASSPATH
+
+if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
+  echo -n "Spark Command: "
+  echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
+  echo "========================================"
+  echo
+fi
+
+exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
diff --git a/bin/spark-shell b/bin/spark-shell
index 9608bd3f30..6717fe7049 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -28,7 +28,7 @@
 # Enter posix mode for bash
 set -o posix
 
-FWDIR="`dirname $0`"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 for o in "$@"; do
   if [ "$1" = "-c" -o "$1" = "--cores" ]; then
@@ -79,7 +79,7 @@ if [[ ! $? ]]; then
   saved_stty=""
 fi
 
-$FWDIR/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
+$FWDIR/sbin/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
 
 # record the exit status lest it be overwritten:
 # then reenable echo and propagate the code.
diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd
index 3e52bf835e..23973e3e3d 100644
--- a/bin/spark-shell.cmd
+++ b/bin/spark-shell.cmd
@@ -17,6 +17,7 @@ rem See the License for the specific language governing permissions and
 rem limitations under the License.
 rem
 
-set FWDIR=%~dp0
+rem Find the path of sbin
+set SBIN=%~dp0..\sbin\
 
-cmd /V /E /C %FWDIR%spark-class2.cmd org.apache.spark.repl.Main %*
+cmd /V /E /C %SBIN%spark-class2.cmd org.apache.spark.repl.Main %*
diff --git a/bin/spark.cmd b/bin/spark.cmd
new file mode 100644
index 0000000000..a162339215
--- /dev/null
+++ b/bin/spark.cmd
@@ -0,0 +1,27 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running a Spark class. To avoid polluting
+rem the environment, it just launches a new cmd to do the real work.
+
+
+rem Find the path of sbin
+set SBIN=%~dp0..\sbin\
+
+cmd /V /E /C %SBIN%spark-class2.cmd %*
diff --git a/python/run-tests b/python/run-tests
index cbc554ea9d..8a08ae3df9 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -29,7 +29,7 @@ FAILED=0
 rm -f unit-tests.log
 
 function run_test() {
-    $FWDIR/pyspark $1 2>&1 | tee -a unit-tests.log
+    $FWDIR/bin/pyspark $1 2>&1 | tee -a unit-tests.log
     FAILED=$((PIPESTATUS[0]||$FAILED))
 }
 
diff --git a/sbin/spark-class b/sbin/spark-class
index e111ef6da7..7f25fe1050 100755
--- a/sbin/spark-class
+++ b/sbin/spark-class
@@ -20,7 +20,7 @@
 SCALA_VERSION=2.9.3
 
 # Figure out where the Scala framework is installed
-FWDIR="$(cd `dirname $0`; pwd)"
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
@@ -104,7 +104,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
 fi
 
 # Compute classpath using external script
-CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+CLASSPATH=`$FWDIR/sbin/compute-classpath.sh`
 export CLASSPATH
 
 if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
diff --git a/sbin/spark-class2.cmd b/sbin/spark-class2.cmd
index d4d853e8ad..5e00bd39ea 100644
--- a/sbin/spark-class2.cmd
+++ b/sbin/spark-class2.cmd
@@ -20,7 +20,7 @@ rem
 set SCALA_VERSION=2.9.3
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0
+set FWDIR=%~dp0..\
 
 rem Export this as SPARK_HOME
 set SPARK_HOME=%FWDIR%
@@ -67,7 +67,7 @@ if "%FOUND_JAR%"=="0" (
 
 rem Compute classpath using external script
 set DONT_PRINT_CLASSPATH=1
-call "%FWDIR%bin\compute-classpath.cmd"
+call "%FWDIR%sbin\compute-classpath.cmd"
 set DONT_PRINT_CLASSPATH=0
 
 rem Figure out where java is.
diff --git a/sbin/spark-executor b/sbin/spark-executor
index 2c07c54843..214e00f6f8 100755
--- a/sbin/spark-executor
+++ b/sbin/spark-executor
@@ -17,6 +17,7 @@
 # limitations under the License.
 #
 
-FWDIR="`dirname $0`"
+FWDIR="$(cd `dirname $0`/..; pwd)"
+
 echo "Running spark-executor with framework dir = $FWDIR"
-exec $FWDIR/spark-class org.apache.spark.executor.MesosExecutorBackend
+exec $FWDIR/sbin/spark-class org.apache.spark.executor.MesosExecutorBackend
-- 
cgit v1.2.3


From e8b1ee04fcb4cd7b666d3148d6d5ff148551ce72 Mon Sep 17 00:00:00 2001
From: shane-huang <shengsheng.huang@intel.com>
Date: Thu, 26 Sep 2013 17:08:47 +0800
Subject: fix paths and change spark to use APP_MEM as application driver
 memory instead of SPARK_MEM, user should add application jars to
 SPARK_CLASSPATH

Signed-off-by: shane-huang <shengsheng.huang@intel.com>
---
 bin/spark                                          | 41 +++++-----------------
 .../mesos/CoarseMesosSchedulerBackend.scala        |  2 +-
 python/pyspark/java_gateway.py                     |  2 +-
 3 files changed, 10 insertions(+), 35 deletions(-)

(limited to 'python')

diff --git a/bin/spark b/bin/spark
index 7f25fe1050..aa005a51f5 100755
--- a/bin/spark
+++ b/bin/spark
@@ -31,40 +31,11 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
 fi
 
 if [ -z "$1" ]; then
-  echo "Usage: spark-class <class> [<args>]" >&2
+  echo "Usage: spark <class> [<args>]" >&2
+  echo "Usage: export SPARK_CLASSPATH before running the command" >&2
   exit 1
 fi
 
-# If this is a standalone cluster daemon, reset SPARK_JAVA_OPTS and SPARK_MEM to reasonable
-# values for that; it doesn't need a lot
-if [ "$1" = "org.apache.spark.deploy.master.Master" -o "$1" = "org.apache.spark.deploy.worker.Worker" ]; then
-  SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
-  SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
-  # Do not overwrite SPARK_JAVA_OPTS environment variable in this script
-  OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS"   # Empty by default
-else
-  OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
-fi
-
-
-# Add java opts for master, worker, executor. The opts maybe null
-case "$1" in
-  'org.apache.spark.deploy.master.Master')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_MASTER_OPTS"
-    ;;
-  'org.apache.spark.deploy.worker.Worker')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_WORKER_OPTS"
-    ;;
-  'org.apache.spark.executor.StandaloneExecutorBackend')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
-    ;;
-  'org.apache.spark.executor.MesosExecutorBackend')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
-    ;;
-  'org.apache.spark.repl.Main')
-    OUR_JAVA_OPTS="$OUR_JAVA_OPTS $SPARK_REPL_OPTS"
-    ;;
-esac
 
 # Find the java binary
 if [ -n "${JAVA_HOME}" ]; then
@@ -78,14 +49,18 @@ else
   fi
 fi
 
-# Set SPARK_MEM if it isn't already set since we also use it for this process
+# Set SPARK_MEM if it isn't already set
 SPARK_MEM=${SPARK_MEM:-512m}
 export SPARK_MEM
 
+# Set APP_MEM if it isn't already set, we use this for this process as the app driver process may need 
+# as much memory as specified in SPARK_MEM
+APP_MEM=${APP_MEM:-512m}
+
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
 JAVA_OPTS="$OUR_JAVA_OPTS"
 JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
-JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+JAVA_OPTS="$JAVA_OPTS -Xms$APP_MEM -Xmx$APP_MEM"
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
 if [ -e $FWDIR/conf/java-opts ] ; then
   JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
diff --git a/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index 9f93491e5a..544b20550e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -125,7 +125,7 @@ private[spark] class CoarseMesosSchedulerBackend(
       StandaloneSchedulerBackend.ACTOR_NAME)
     val uri = System.getProperty("spark.executor.uri")
     if (uri == null) {
-      val runScript = new File(sparkHome, "/sbin/spark-class").getCanonicalPath
+      val runScript = new File(sparkHome, "./sbin/spark-class").getCanonicalPath
       command.setValue(
         "\"%s\" org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
           runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index f7834ef803..b872ae61d5 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -31,7 +31,7 @@ def launch_gateway():
     # Launch the Py4j gateway using Spark's run command so that we pick up the
     # proper classpath and SPARK_MEM settings from spark-env.sh
     on_windows = platform.system() == "Windows"
-    script = "/sbin/spark-class.cmd" if on_windows else "/sbin/spark-class"
+    script = "./sbin/spark-class.cmd" if on_windows else "./sbin/spark-class"
     command = [os.path.join(SPARK_HOME, script), "py4j.GatewayServer",
                "--die-on-broken-pipe", "0"]
     if not on_windows:
-- 
cgit v1.2.3