From 85a35c68401e171df0b72b172a689d8c4e412199 Mon Sep 17 00:00:00 2001
From: Christoph Grothaus <cgrothaus@zeb.de>
Date: Fri, 15 Feb 2013 14:11:34 +0100
Subject: Fix SPARK-698. From ExecutorRunner, launch java directly instead via
 the run scripts.

---
 run2.cmd | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'run2.cmd')

diff --git a/run2.cmd b/run2.cmd
index c913a5195e..a93bbad0b9 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -22,6 +22,8 @@ if "%1"=="spark.deploy.master.Master" set RUNNING_DAEMON=1
 if "%1"=="spark.deploy.worker.Worker" set RUNNING_DAEMON=1
 if "x%SPARK_DAEMON_MEMORY%" == "x" set SPARK_DAEMON_MEMORY=512m
 if "%RUNNING_DAEMON%"=="1" set SPARK_MEM=%SPARK_DAEMON_MEMORY%
+rem Backup current SPARK_JAVA_OPTS for use in ExecutorRunner.scala
+if "%RUNNING_DAEMON%"=="1" set SPARK_NONDAEMON_JAVA_OPTS=%SPARK_JAVA_OPTS%
 if "%RUNNING_DAEMON%"=="1" set SPARK_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS%
 
 rem Check that SCALA_HOME has been specified
@@ -42,6 +44,7 @@ rem Set JAVA_OPTS to be able to load native libraries and to set heap size
 set JAVA_OPTS=%SPARK_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
 rem Load extra JAVA_OPTS from conf/java-opts, if it exists
 if exist "%FWDIR%conf\java-opts.cmd" call "%FWDIR%conf\java-opts.cmd"
+rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
 
 set CORE_DIR=%FWDIR%core
 set REPL_DIR=%FWDIR%repl
-- 
cgit v1.2.3


From f39f2b7636f52568a556987c8b7f7393299b0351 Mon Sep 17 00:00:00 2001
From: Christoph Grothaus <cgrothaus@zeb.de>
Date: Sun, 24 Feb 2013 21:24:30 +0100
Subject: Incorporate feedback from mateiz: - we do not need getEnvOrEmpty -
 Instead of saving SPARK_NONDAEMON_JAVA_OPTS, it would be better to modify the
 scripts to use a different variable name for the JAVA_OPTS they do eventually
 use

---
 .../scala/spark/deploy/worker/ExecutorRunner.scala | 24 +++++++---------------
 run                                                |  9 ++++----
 run2.cmd                                           |  8 ++++----
 3 files changed, 16 insertions(+), 25 deletions(-)

(limited to 'run2.cmd')

diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index 214c44fc88..38216ce62f 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -75,10 +75,10 @@ private[spark] class ExecutorRunner(
 
   def buildCommandSeq(): Seq[String] = {
     val command = appDesc.command
-    val runner = if (getEnvOrEmpty("JAVA_HOME") == "") {
+    val runner = if (System.getenv("JAVA_HOME") == null) {
       "java"
     } else {
-      getEnvOrEmpty("JAVA_HOME") + "/bin/java"
+      System.getenv("JAVA_HOME") + "/bin/java"
     }
     // SPARK-698: do not call the run.cmd script, as process.destroy()
     // fails to kill a process tree on Windows
@@ -91,29 +91,19 @@ private[spark] class ExecutorRunner(
    * way the JAVA_OPTS are assembled there.
    */
   def buildJavaOpts(): Seq[String] = {
-    val _javaLibPath = if (getEnvOrEmpty("SPARK_LIBRARY_PATH") == "") {
+    val _javaLibPath = if (System.getenv("SPARK_LIBRARY_PATH") == null) {
       ""
     } else {
-      "-Djava.library.path=" + getEnvOrEmpty("SPARK_LIBRARY_PATH")
+      "-Djava.library.path=" + System.getenv("SPARK_LIBRARY_PATH")
     }
     
     Seq("-cp", 
-        getEnvOrEmpty("CLASSPATH"), 
-        // SPARK_JAVA_OPTS is overwritten with SPARK_DAEMON_JAVA_OPTS for running the worker
-        getEnvOrEmpty("SPARK_NONDAEMON_JAVA_OPTS"), 
+        System.getenv("CLASSPATH"), 
+        System.getenv("SPARK_JAVA_OPTS"), 
         _javaLibPath,
         "-Xms" + memory.toString + "M",
         "-Xmx" + memory.toString + "M")
-    .filter(_ != "")
-  }
-  
-  def getEnvOrEmpty(key: String): String = {
-    val result = System.getenv(key)
-    if (result == null) {
-      ""
-    } else {
-      result
-    }
+    .filter(_ != null)
   }
 
   /** Spawn a thread that will redirect a given stream to a file */
diff --git a/run b/run
index b5f693f1fa..e1482dafbe 100755
--- a/run
+++ b/run
@@ -22,9 +22,10 @@ fi
 # values for that; it doesn't need a lot
 if [ "$1" = "spark.deploy.master.Master" -o "$1" = "spark.deploy.worker.Worker" ]; then
   SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
-  # Backup current SPARK_JAVA_OPTS for use in ExecutorRunner.scala
-  SPARK_NONDAEMON_JAVA_OPTS=$SPARK_JAVA_OPTS
-  SPARK_JAVA_OPTS=$SPARK_DAEMON_JAVA_OPTS   # Empty by default
+  # Do not overwrite SPARK_JAVA_OPTS environment variable in this script
+  OUR_JAVA_OPTS=$SPARK_DAEMON_JAVA_OPTS   # Empty by default
+else
+  OUR_JAVA_OPTS=$SPARK_JAVA_OPTS
 fi
 
 if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
@@ -64,7 +65,7 @@ fi
 export SPARK_MEM
 
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
-JAVA_OPTS="$SPARK_JAVA_OPTS"
+JAVA_OPTS="$OUR_JAVA_OPTS"
 JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH"
 JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
diff --git a/run2.cmd b/run2.cmd
index a93bbad0b9..8648c0380a 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -22,9 +22,9 @@ if "%1"=="spark.deploy.master.Master" set RUNNING_DAEMON=1
 if "%1"=="spark.deploy.worker.Worker" set RUNNING_DAEMON=1
 if "x%SPARK_DAEMON_MEMORY%" == "x" set SPARK_DAEMON_MEMORY=512m
 if "%RUNNING_DAEMON%"=="1" set SPARK_MEM=%SPARK_DAEMON_MEMORY%
-rem Backup current SPARK_JAVA_OPTS for use in ExecutorRunner.scala
-if "%RUNNING_DAEMON%"=="1" set SPARK_NONDAEMON_JAVA_OPTS=%SPARK_JAVA_OPTS%
-if "%RUNNING_DAEMON%"=="1" set SPARK_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS%
+rem Do not overwrite SPARK_JAVA_OPTS environment variable in this script
+if "%RUNNING_DAEMON%"=="0" set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
+if "%RUNNING_DAEMON%"=="1" set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS%
 
 rem Check that SCALA_HOME has been specified
 if not "x%SCALA_HOME%"=="x" goto scala_exists
@@ -41,7 +41,7 @@ rem variable so that our process sees it and can report it to Mesos
 if "x%SPARK_MEM%"=="x" set SPARK_MEM=512m
 
 rem Set JAVA_OPTS to be able to load native libraries and to set heap size
-set JAVA_OPTS=%SPARK_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
+set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
 rem Load extra JAVA_OPTS from conf/java-opts, if it exists
 if exist "%FWDIR%conf\java-opts.cmd" call "%FWDIR%conf\java-opts.cmd"
 rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
-- 
cgit v1.2.3


From ee37612bc95e8486fa328908005293585912db71 Mon Sep 17 00:00:00 2001
From: Mridul Muralidharan <mridul@gmail.com>
Date: Sat, 11 May 2013 11:12:22 +0530
Subject: 1) Add support for HADOOP_CONF_DIR (and/or YARN_CONF_DIR - use
 either) : which is used to specify the client side configuration directory :
 which needs to be part of the CLASSPATH.

2) Move from var+=".." to var="$var.." : the former does not work on older bash shells unfortunately.
---
 docs/running-on-yarn.md |  3 +++
 run                     | 65 +++++++++++++++++++++++++++++--------------------
 run2.cmd                | 13 ++++++++++
 3 files changed, 54 insertions(+), 27 deletions(-)

(limited to 'run2.cmd')

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 26424bbe52..c8cf8ffc35 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -30,6 +30,9 @@ If you want to test out the YARN deployment mode, you can use the current Spark
 
 # Launching Spark on YARN
 
+Ensure that HADOOP_CONF_DIR or YARN_CONF_DIR points to the directory which contains the (client side) configuration files for the hadoop cluster.
+This would be used to connect to the cluster, write to the dfs and submit jobs to the resource manager.
+
 The command to launch the YARN Client is as follows:
 
     SPARK_JAR=<SPARK_YAR_FILE> ./run spark.deploy.yarn.Client \
diff --git a/run b/run
index 0a58ac4a36..c744bbd3dc 100755
--- a/run
+++ b/run
@@ -22,7 +22,7 @@ fi
 # values for that; it doesn't need a lot
 if [ "$1" = "spark.deploy.master.Master" -o "$1" = "spark.deploy.worker.Worker" ]; then
   SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
-  SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true"
+  SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
   SPARK_JAVA_OPTS=$SPARK_DAEMON_JAVA_OPTS   # Empty by default
 fi
 
@@ -30,19 +30,19 @@ fi
 # Add java opts for master, worker, executor. The opts maybe null
 case "$1" in
   'spark.deploy.master.Master')
-    SPARK_JAVA_OPTS+=" $SPARK_MASTER_OPTS"
+    SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_MASTER_OPTS"
     ;;
   'spark.deploy.worker.Worker')
-    SPARK_JAVA_OPTS+=" $SPARK_WORKER_OPTS"
+    SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_WORKER_OPTS"
     ;;
   'spark.executor.StandaloneExecutorBackend')
-    SPARK_JAVA_OPTS+=" $SPARK_EXECUTOR_OPTS"
+    SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
     ;;
   'spark.executor.MesosExecutorBackend')
-    SPARK_JAVA_OPTS+=" $SPARK_EXECUTOR_OPTS"
+    SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
     ;;
   'spark.repl.Main')
-    SPARK_JAVA_OPTS+=" $SPARK_REPL_OPTS"
+    SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_REPL_OPTS"
     ;;
 esac
 
@@ -85,11 +85,11 @@ export SPARK_MEM
 
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
 JAVA_OPTS="$SPARK_JAVA_OPTS"
-JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH"
-JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
+JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
 if [ -e $FWDIR/conf/java-opts ] ; then
-  JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
+  JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
 fi
 export JAVA_OPTS
 
@@ -110,30 +110,30 @@ fi
 
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH"
-CLASSPATH+=":$FWDIR/conf"
-CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$FWDIR/conf"
+CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
 if [ -n "$SPARK_TESTING" ] ; then
-  CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH+=":$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
 fi
-CLASSPATH+=":$CORE_DIR/src/main/resources"
-CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH+=":$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH+=":$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
+CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
+CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
 if [ -e "$FWDIR/lib_managed" ]; then
-  CLASSPATH+=":$FWDIR/lib_managed/jars/*"
-  CLASSPATH+=":$FWDIR/lib_managed/bundles/*"
+  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
+  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
 fi
-CLASSPATH+=":$REPL_DIR/lib/*"
+CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
 if [ -e $REPL_BIN_DIR/target ]; then
   for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
-    CLASSPATH+=":$jar"
+    CLASSPATH="$CLASSPATH:$jar"
   done
 fi
-CLASSPATH+=":$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
 for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
-  CLASSPATH+=":$jar"
+  CLASSPATH="$CLASSPATH:$jar"
 done
 
 # Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
@@ -147,6 +147,17 @@ if [ -e "$EXAMPLES_DIR/target/spark-examples-"*hadoop[12].jar ]; then
   export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples-"*hadoop[12].jar`
 fi
 
+# Add hadoop conf dir - else FileSystem.*, etc fail !
+# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
+# the configurtion files.
+if [ "x" != "x$HADOOP_CONF_DIR" ]; then
+  CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR"
+fi
+if [ "x" != "x$YARN_CONF_DIR" ]; then
+  CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
+fi
+
+
 # Figure out whether to run our class with java or with the scala launcher.
 # In most cases, we'd prefer to execute our process with java because scala
 # creates a shell script as the parent of its Java process, which makes it
@@ -156,9 +167,9 @@ fi
 if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
   EXTRA_ARGS=""     # Java options will be passed to scala as JAVA_OPTS
 else
-  CLASSPATH+=":$SCALA_LIBRARY_PATH/scala-library.jar"
-  CLASSPATH+=":$SCALA_LIBRARY_PATH/scala-compiler.jar"
-  CLASSPATH+=":$SCALA_LIBRARY_PATH/jline.jar"
+  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
+  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
+  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
   # The JVM doesn't read JAVA_OPTS by default so we need to pass it in
   EXTRA_ARGS="$JAVA_OPTS"
 fi
diff --git a/run2.cmd b/run2.cmd
index d2d4807971..c6f43dde5b 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -63,6 +63,19 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
 set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
 set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
 
+rem Add hadoop conf dir - else FileSystem.*, etc fail
+rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
+rem the configurtion files.
+if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
+  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
+:no_hadoop_conf_dir
+
+if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
+  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
+:no_yarn_conf_dir
+
+
+
 rem Figure out the JAR file that our examples were packaged into.
 rem First search in the build path from SBT:
 for %%d in ("examples/target/scala-%SCALA_VERSION%/spark-examples*.jar") do (
-- 
cgit v1.2.3


From 6c8d1b2ca618a1a17566ede46821c0807a1b11f5 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 25 Jun 2013 18:21:00 -0400
Subject: Fix computation of classpath when we launch java directly

The previous version assumed that a CLASSPATH environment variable was
set by the "run" script when launching the process that starts the
ExecutorRunner, but unfortunately this is not true in tests. Instead, we
factor the classpath calculation into an extenral script and call that.

NOTE: This includes a Windows version but hasn't yet been tested there.
---
 bin/compute-classpath.cmd                          | 52 +++++++++++++
 bin/compute-classpath.sh                           | 89 ++++++++++++++++++++++
 core/src/main/scala/spark/Utils.scala              | 31 ++++++++
 .../scala/spark/deploy/worker/ExecutorRunner.scala | 19 +----
 run                                                | 67 +++-------------
 run2.cmd                                           | 38 +--------
 6 files changed, 189 insertions(+), 107 deletions(-)
 create mode 100644 bin/compute-classpath.cmd
 create mode 100755 bin/compute-classpath.sh

(limited to 'run2.cmd')

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
new file mode 100644
index 0000000000..1dff8fea22
--- /dev/null
+++ b/bin/compute-classpath.cmd
@@ -0,0 +1,52 @@
+@echo off
+
+rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
+rem script and the ExecutorRunner in standalone cluster mode.
+
+set SCALA_VERSION=2.9.3
+
+rem Figure out where the Spark framework is installed
+set FWDIR=%~dp0\..
+
+rem Load environment variables from conf\spark-env.cmd, if it exists
+if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
+
+set CORE_DIR=%FWDIR%core
+set REPL_DIR=%FWDIR%repl
+set EXAMPLES_DIR=%FWDIR%examples
+set BAGEL_DIR=%FWDIR%bagel
+set STREAMING_DIR=%FWDIR%streaming
+set PYSPARK_DIR=%FWDIR%python
+
+rem Build up classpath
+set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%CLASSPATH%;%CORE_DIR%\target\scala-%SCALA_VERSION%\test-classes;%CORE_DIR%\src\main\resources
+set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\classes;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\test-classes
+set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\lib\org\apache\kafka\kafka\0.7.2-spark\*
+set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\jars\*
+set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
+set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
+set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
+set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
+
+rem Add hadoop conf dir - else FileSystem.*, etc fail
+rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
+rem the configurtion files.
+if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
+  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
+:no_hadoop_conf_dir
+
+if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
+  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
+:no_yarn_conf_dir
+
+rem Add Scala standard library
+set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar
+
+rem A bit of a hack to allow calling this script within run2.cmd without seeing output
+if "x%DONT_PRINT_CLASSPATH%"=="x1" goto exit
+
+echo %CLASSPATH%
+
+:exit
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
new file mode 100755
index 0000000000..3a78880290
--- /dev/null
+++ b/bin/compute-classpath.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+# This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
+# script and the ExecutorRunner in standalone cluster mode.
+
+SCALA_VERSION=2.9.3
+
+# Figure out where Spark is installed
+FWDIR="$(cd `dirname $0`/..; pwd)"
+
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+  . $FWDIR/conf/spark-env.sh
+fi
+
+CORE_DIR="$FWDIR/core"
+REPL_DIR="$FWDIR/repl"
+REPL_BIN_DIR="$FWDIR/repl-bin"
+EXAMPLES_DIR="$FWDIR/examples"
+BAGEL_DIR="$FWDIR/bagel"
+STREAMING_DIR="$FWDIR/streaming"
+PYSPARK_DIR="$FWDIR/python"
+
+# Build up classpath
+CLASSPATH="$SPARK_CLASSPATH"
+CLASSPATH="$CLASSPATH:$FWDIR/conf"
+CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
+if [ -n "$SPARK_TESTING" ] ; then
+  CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
+  CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
+fi
+CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
+CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
+if [ -e "$FWDIR/lib_managed" ]; then
+  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
+  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
+fi
+CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
+# Add the shaded JAR for Maven builds
+if [ -e $REPL_BIN_DIR/target ]; then
+  for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
+    CLASSPATH="$CLASSPATH:$jar"
+  done
+  # The shaded JAR doesn't contain examples, so include those separately
+  EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
+  CLASSPATH+=":$EXAMPLES_JAR"
+fi
+CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
+for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
+  CLASSPATH="$CLASSPATH:$jar"
+done
+
+# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
+# to avoid the -sources and -doc packages that are built by publish-local.
+if [ -e "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar ]; then
+  # Use the JAR from the SBT build
+  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/scala-$SCALA_VERSION/spark-examples"*[0-9T].jar`
+fi
+if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
+  # Use the JAR from the Maven build
+  export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
+fi
+
+# Add hadoop conf dir - else FileSystem.*, etc fail !
+# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
+# the configurtion files.
+if [ "x" != "x$HADOOP_CONF_DIR" ]; then
+  CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR"
+fi
+if [ "x" != "x$YARN_CONF_DIR" ]; then
+  CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
+fi
+
+# Add Scala standard library
+if [ -z "$SCALA_LIBRARY_PATH" ]; then
+  if [ -z "$SCALA_HOME" ]; then
+    echo "SCALA_HOME is not set" >&2
+    exit 1
+  fi
+  SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
+fi
+CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
+CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
+CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
+
+echo "$CLASSPATH"
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
index bdc1494cc9..f41efa9d29 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/spark/Utils.scala
@@ -522,6 +522,37 @@ private object Utils extends Logging {
     execute(command, new File("."))
   }
 
+  /**
+   * Execute a command and get its output, throwing an exception if it yields a code other than 0.
+   */
+  def executeAndGetOutput(command: Seq[String], workingDir: File = new File(".")): String = {
+    val process = new ProcessBuilder(command: _*)
+        .directory(workingDir)
+        .start()
+    new Thread("read stderr for " + command(0)) {
+      override def run() {
+        for (line <- Source.fromInputStream(process.getErrorStream).getLines) {
+          System.err.println(line)
+        }
+      }
+    }.start()
+    val output = new StringBuffer
+    val stdoutThread = new Thread("read stdout for " + command(0)) {
+      override def run() {
+        for (line <- Source.fromInputStream(process.getInputStream).getLines) {
+          output.append(line)
+        }
+      }
+    }
+    stdoutThread.start()
+    val exitCode = process.waitFor()
+    stdoutThread.join()   // Wait for it to finish reading output
+    if (exitCode != 0) {
+      throw new SparkException("Process " + command + " exited with code " + exitCode)
+    }
+    output.toString
+  }
+
   private[spark] class CallSiteInfo(val lastSparkMethod: String, val firstUserFile: String,
                                     val firstUserLine: Int, val firstUserClass: String)
   /**
diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
index db580e39ab..4f8e1dcb26 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
@@ -107,21 +107,9 @@ private[spark] class ExecutorRunner(
 
     val memoryOpts = Seq("-Xms" + memory + "M", "-Xmx" + memory + "M")
 
-    var classPath = System.getenv("CLASSPATH")
-    if (System.getenv("SPARK_LAUNCH_WITH_SCALA") == "1") {
-      // Add the Scala library JARs to the classpath; this is needed when the ExecutorRunner
-      // was launched with "scala" as the runner (e.g. in spark-shell in local-cluster mode)
-      // and the Scala libraries won't be in the CLASSPATH environment variable by defalt.
-      if (System.getenv("SCALA_LIBRARY_PATH") == null && System.getenv("SCALA_HOME") == null) {
-        logError("Cloud not launch executors: neither SCALA_LIBRARY_PATH nor SCALA_HOME are set")
-        System.exit(1)
-      }
-      val scalaLib = Option(System.getenv("SCALA_LIBRARY_PATH")).getOrElse(
-        System.getenv("SCALA_HOME") + "/lib")
-      classPath += ":" + scalaLib + "/scala-library.jar" +
-                   ":" + scalaLib + "/scala-compiler.jar" +
-                   ":" + scalaLib + "/jline.jar"
-    }
+    // Figure out our classpath with the external compute-classpath script
+    val ext = if (System.getProperty("os.name").startsWith("Windows")) ".cmd" else ".sh"
+    val classPath = Utils.executeAndGetOutput(Seq(sparkHome + "/bin/compute-classpath" + ext))
 
     Seq("-cp", classPath) ++ libraryOpts ++ userOpts ++ memoryOpts
   }
@@ -154,7 +142,6 @@ private[spark] class ExecutorRunner(
 
       // Launch the process
       val command = buildCommandSeq()
-      println("COMMAND: " + command.mkString(" "))
       val builder = new ProcessBuilder(command: _*).directory(executorDir)
       val env = builder.environment()
       for ((key, value) <- appDesc.command.environment) {
diff --git a/run b/run
index 0fb15f8b24..7c06a55062 100755
--- a/run
+++ b/run
@@ -49,6 +49,12 @@ case "$1" in
     ;;
 esac
 
+# Figure out whether to run our class with java or with the scala launcher.
+# In most cases, we'd prefer to execute our process with java because scala
+# creates a shell script as the parent of its Java process, which makes it
+# hard to kill the child with stuff like Process.destroy(). However, for
+# the Spark shell, the wrapper is necessary to properly reset the terminal
+# when we exit, so we allow it to set a variable to launch with scala.
 if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
   if [ "$SCALA_HOME" ]; then
     RUNNER="${SCALA_HOME}/bin/scala"
@@ -98,12 +104,8 @@ export JAVA_OPTS
 # Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
 
 CORE_DIR="$FWDIR/core"
-REPL_DIR="$FWDIR/repl"
-REPL_BIN_DIR="$FWDIR/repl-bin"
 EXAMPLES_DIR="$FWDIR/examples"
-BAGEL_DIR="$FWDIR/bagel"
-STREAMING_DIR="$FWDIR/streaming"
-PYSPARK_DIR="$FWDIR/python"
+REPL_DIR="$FWDIR/repl"
 
 # Exit if the user hasn't compiled Spark
 if [ ! -e "$CORE_DIR/target" ]; then
@@ -118,37 +120,9 @@ if [[ "$@" = *repl* && ! -e "$REPL_DIR/target" ]]; then
   exit 1
 fi
 
-# Build up classpath
-CLASSPATH="$SPARK_CLASSPATH"
-CLASSPATH="$CLASSPATH:$FWDIR/conf"
-CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
-if [ -n "$SPARK_TESTING" ] ; then
-  CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
-  CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
-fi
-CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
-CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
-if [ -e "$FWDIR/lib_managed" ]; then
-  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
-  CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
-fi
-CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
-# Add the shaded JAR for Maven builds
-if [ -e $REPL_BIN_DIR/target ]; then
-  for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
-    CLASSPATH="$CLASSPATH:$jar"
-  done
-  # The shaded JAR doesn't contain examples, so include those separately
-  EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
-  CLASSPATH+=":$EXAMPLES_JAR"
-fi
-CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
-for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
-  CLASSPATH="$CLASSPATH:$jar"
-done
+# Compute classpath using external script
+CLASSPATH=`$FWDIR/bin/compute-classpath.sh`
+export CLASSPATH
 
 # Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
 # to avoid the -sources and -doc packages that are built by publish-local.
@@ -161,32 +135,11 @@ if [ -e "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar ]; then
   export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples"*[0-9T].jar`
 fi
 
-# Add hadoop conf dir - else FileSystem.*, etc fail !
-# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
-# the configurtion files.
-if [ "x" != "x$HADOOP_CONF_DIR" ]; then
-  CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR"
-fi
-if [ "x" != "x$YARN_CONF_DIR" ]; then
-  CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
-fi
-
-
-# Figure out whether to run our class with java or with the scala launcher.
-# In most cases, we'd prefer to execute our process with java because scala
-# creates a shell script as the parent of its Java process, which makes it
-# hard to kill the child with stuff like Process.destroy(). However, for
-# the Spark shell, the wrapper is necessary to properly reset the terminal
-# when we exit, so we allow it to set a variable to launch with scala.
 if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
   EXTRA_ARGS=""     # Java options will be passed to scala as JAVA_OPTS
 else
-  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
-  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
-  CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
   # The JVM doesn't read JAVA_OPTS by default so we need to pass it in
   EXTRA_ARGS="$JAVA_OPTS"
 fi
 
-export CLASSPATH # Needed for spark-shell
 exec "$RUNNER" -cp "$CLASSPATH" $EXTRA_ARGS "$@"
diff --git a/run2.cmd b/run2.cmd
index bf76844d11..25e4f3b57c 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -33,51 +33,21 @@ if not "x%SCALA_HOME%"=="x" goto scala_exists
   goto exit
 :scala_exists
 
-rem If the user specifies a Mesos JAR, put it before our included one on the classpath
-set MESOS_CLASSPATH=
-if not "x%MESOS_JAR%"=="x" set MESOS_CLASSPATH=%MESOS_JAR%
-
 rem Figure out how much memory to use per executor and set it as an environment
 rem variable so that our process sees it and can report it to Mesos
 if "x%SPARK_MEM%"=="x" set SPARK_MEM=512m
 
 rem Set JAVA_OPTS to be able to load native libraries and to set heap size
 set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
-rem Load extra JAVA_OPTS from conf/java-opts, if it exists
-if exist "%FWDIR%conf\java-opts.cmd" call "%FWDIR%conf\java-opts.cmd"
 rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
 
 set CORE_DIR=%FWDIR%core
-set REPL_DIR=%FWDIR%repl
 set EXAMPLES_DIR=%FWDIR%examples
-set BAGEL_DIR=%FWDIR%bagel
-set STREAMING_DIR=%FWDIR%streaming
-set PYSPARK_DIR=%FWDIR%python
-
-rem Build up classpath
-set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%CORE_DIR%\target\scala-%SCALA_VERSION%\test-classes;%CORE_DIR%\src\main\resources
-set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\classes;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\test-classes
-set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\lib\org\apache\kafka\kafka\0.7.2-spark\*
-set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\jars\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
-set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
-
-rem Add hadoop conf dir - else FileSystem.*, etc fail
-rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
-rem the configurtion files.
-if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
-  set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
-:no_hadoop_conf_dir
-
-if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
-  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
-:no_yarn_conf_dir
-
+set REPL_DIR=%FWDIR%repl
 
+rem Compute classpath using external script
+set DONT_PRINT_CLASSPATH=1
+call "%FWDIR%bin\compute-classpath.cmd"
 
 rem Figure out the JAR file that our examples were packaged into.
 rem First search in the build path from SBT:
-- 
cgit v1.2.3


From 03906f7f0a8c93b09e3b47ccaad5b5f72c29302b Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 26 Jun 2013 17:40:22 -0700
Subject: Fixes to compute-classpath on Windows

---
 bin/compute-classpath.cmd | 4 ++--
 run2.cmd                  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'run2.cmd')

diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 1dff8fea22..6e7efbd334 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -6,7 +6,7 @@ rem script and the ExecutorRunner in standalone cluster mode.
 set SCALA_VERSION=2.9.3
 
 rem Figure out where the Spark framework is installed
-set FWDIR=%~dp0\..
+set FWDIR=%~dp0..\
 
 rem Load environment variables from conf\spark-env.cmd, if it exists
 if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
@@ -45,7 +45,7 @@ rem Add Scala standard library
 set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar
 
 rem A bit of a hack to allow calling this script within run2.cmd without seeing output
-if "x%DONT_PRINT_CLASSPATH%"=="x1" goto exit
+if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
 
 echo %CLASSPATH%
 
diff --git a/run2.cmd b/run2.cmd
index 25e4f3b57c..a9c4df180f 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -48,6 +48,7 @@ set REPL_DIR=%FWDIR%repl
 rem Compute classpath using external script
 set DONT_PRINT_CLASSPATH=1
 call "%FWDIR%bin\compute-classpath.cmd"
+set DONT_PRINT_CLASSPATH=0
 
 rem Figure out the JAR file that our examples were packaged into.
 rem First search in the build path from SBT:
-- 
cgit v1.2.3