From 995982b3c9fdd4b031ccca4dfe76b4951ce1fcff Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 7 Sep 2012 17:08:36 -0700
Subject: Added a unit test for local-cluster mode and simplified some of the
 code involved in that

---
 run | 1 +
 1 file changed, 1 insertion(+)

(limited to 'run')

diff --git a/run b/run
index 8f7256b4e5..2946a04d3f 100755
--- a/run
+++ b/run
@@ -52,6 +52,7 @@ CLASSPATH="$SPARK_CLASSPATH"
 CLASSPATH+=":$MESOS_CLASSPATH"
 CLASSPATH+=":$FWDIR/conf"
 CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/classes"
+CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
 CLASSPATH+=":$CORE_DIR/src/main/resources"
 CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
-- 
cgit v1.2.3


From 1f539aa473981ec47b253dfe86c84ed2bcf286ba Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 24 Sep 2012 14:12:48 -0700
Subject: Update Scala version dependency to 2.9.2

---
 project/SparkBuild.scala | 2 +-
 run                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'run')

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 203001954a..9440b0f112 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -22,7 +22,7 @@ object SparkBuild extends Build {
   def sharedSettings = Defaults.defaultSettings ++ Seq(
     organization := "org.spark-project",
     version := "0.6.0-SNAPSHOT",
-    scalaVersion := "2.9.1",
+    scalaVersion := "2.9.2",
     scalacOptions := Seq(/*"-deprecation",*/ "-unchecked", "-optimize"), // -deprecation is too noisy due to usage of old Hadoop API, enable it once that's no longer an issue
     unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath },
     retrieveManaged := true,
diff --git a/run b/run
index 2946a04d3f..5f640789ff 100755
--- a/run
+++ b/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-SCALA_VERSION=2.9.1
+SCALA_VERSION=2.9.2
 
 # Figure out where the Scala framework is installed
 FWDIR="$(cd `dirname $0`; pwd)"
-- 
cgit v1.2.3


From c535762debf56c64183734935e6e3207720df5b0 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 4 Oct 2012 15:11:43 -0700
Subject: Don't check for JARs in core/lib anymore

---
 run      | 3 ---
 run2.cmd | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'run')

diff --git a/run b/run
index 5f640789ff..6cab4765ee 100755
--- a/run
+++ b/run
@@ -56,9 +56,6 @@ CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
 CLASSPATH+=":$CORE_DIR/src/main/resources"
 CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
-for jar in `find $CORE_DIR/lib -name '*jar'`; do
-  CLASSPATH+=":$jar"
-done
 for jar in `find $FWDIR/lib_managed/jars -name '*jar'`; do
   CLASSPATH+=":$jar"
 done
diff --git a/run2.cmd b/run2.cmd
index 9fc4d5054b..097718b526 100644
--- a/run2.cmd
+++ b/run2.cmd
@@ -39,7 +39,6 @@ rem Build up classpath
 set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes
 set CLASSPATH=%CLASSPATH%;%CORE_DIR%\target\scala-%SCALA_VERSION%\test-classes;%CORE_DIR%\src\main\resources
 set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\classes
-for /R "%CORE_DIR%\lib" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
 for /R "%FWDIR%\lib_managed\jars" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
 for /R "%FWDIR%\lib_managed\bundles" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
 for /R "%REPL_DIR%\lib" %%j in (*.jar) do set CLASSPATH=!CLASSPATH!;%%j
@@ -65,4 +64,4 @@ if "%SPARK_LAUNCH_WITH_SCALA%" NEQ 1 goto java_runner
 :run_spark
 
 %RUNNER% -cp "%CLASSPATH%" %EXTRA_ARGS% %*
-:exit
\ No newline at end of file
+:exit
-- 
cgit v1.2.3


From ce915cadee1de8e265f090b7be2f6e70d1b4062e Mon Sep 17 00:00:00 2001
From: root <root@ip-10-226-118-223.ec2.internal>
Date: Sun, 7 Oct 2012 04:19:16 +0000
Subject: Made run script add test-classes onto the classpath only if
 SPARK_TESTING is set; fixes #216

---
 .../scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala | 3 ++-
 .../scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala   | 3 ++-
 .../main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala    | 3 ++-
 run                                                                 | 6 ++++--
 sbt/sbt                                                             | 1 +
 5 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'run')

diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 0043dbeb10..88cb114544 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -25,7 +25,8 @@ private[spark] class SparkDeploySchedulerBackend(
     "SPARK_MEM",
     "SPARK_CLASSPATH",
     "SPARK_LIBRARY_PATH",
-    "SPARK_JAVA_OPTS"
+    "SPARK_JAVA_OPTS",
+    "SPARK_TESTING"
   )
 
   // Memory used by each executor (in megabytes)
diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index 9737c6b63e..e6d8b9d822 100644
--- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -38,7 +38,8 @@ private[spark] class CoarseMesosSchedulerBackend(
     "SPARK_MEM",
     "SPARK_CLASSPATH",
     "SPARK_LIBRARY_PATH",
-    "SPARK_JAVA_OPTS"
+    "SPARK_JAVA_OPTS",
+    "SPARK_TESTING"
   )
 
   val MAX_SLAVE_FAILURES = 2     // Blacklist a slave after this many failures
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
index e85e4ef318..6f01c8c09d 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -34,7 +34,8 @@ private[spark] class MesosSchedulerBackend(
     "SPARK_MEM",
     "SPARK_CLASSPATH",
     "SPARK_LIBRARY_PATH",
-    "SPARK_JAVA_OPTS"
+    "SPARK_JAVA_OPTS",
+    "SPARK_TESTING"
   )
 
   // Memory used by each executor (in megabytes)
diff --git a/run b/run
index 6cab4765ee..cb1499c6f9 100755
--- a/run
+++ b/run
@@ -21,7 +21,7 @@ fi
 
 # If the user specifies a Mesos JAR, put it before our included one on the classpath
 MESOS_CLASSPATH=""
-if [ -z "$MESOS_JAR" ] ; then
+if [ -n "$MESOS_JAR" ] ; then
   MESOS_CLASSPATH="$MESOS_JAR"
 fi
 
@@ -52,7 +52,9 @@ CLASSPATH="$SPARK_CLASSPATH"
 CLASSPATH+=":$MESOS_CLASSPATH"
 CLASSPATH+=":$FWDIR/conf"
 CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/classes"
-CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
+if [ -n "$SPARK_TESTING" ] ; then
+  CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
+fi
 CLASSPATH+=":$CORE_DIR/src/main/resources"
 CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
 CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
diff --git a/sbt/sbt b/sbt/sbt
index fab9967286..a3055c13c1 100755
--- a/sbt/sbt
+++ b/sbt/sbt
@@ -4,4 +4,5 @@ if [ "$MESOS_HOME" != "" ]; then
   EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java"
 fi
 export SPARK_HOME=$(cd "$(dirname $0)/.."; pwd)
+export SPARK_TESTING=1  # To put test classes on classpath
 java -Xmx1200M -XX:MaxPermSize=200m $EXTRA_ARGS -jar $SPARK_HOME/sbt/sbt-launch-*.jar "$@"
-- 
cgit v1.2.3


From 4a3e9cf69c7ccca8bc287de57e51f5ed0ab55a8a Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 13 Oct 2012 16:20:25 -0700
Subject: Document how to configure SPARK_MEM & co on a per-job basis

---
 conf/spark-env.sh.template | 29 ++++++++++++++++-------------
 docs/configuration.md      | 26 +++++++++++++++++++-------
 run                        |  7 -------
 3 files changed, 35 insertions(+), 27 deletions(-)

(limited to 'run')

diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 64eacce8a2..6d71ec5691 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -1,21 +1,24 @@
 #!/usr/bin/env bash
 
-# Set Spark environment variables for your site in this file. Some useful
-# variables to set are:
+# This file contains environment variables required to run Spark. Copy it as
+# spark-env.sh and edit that to configure Spark for your site. At a minimum,
+# the following two variables should be set:
 # - MESOS_NATIVE_LIBRARY, to point to your Mesos native library (libmesos.so)
 # - SCALA_HOME, to point to your Scala installation
+#
+# If using the standalone deploy mode, you can also set variables for it:
+# - SPARK_MASTER_IP, to bind the master to a different IP address
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
+#
+# Finally, Spark also relies on the following variables, but these can be set
+# on just the *master* (i.e. in your driver program), and will automatically
+# be propagated to workers:
+# - SPARK_MEM, to change the amount of memory used per node (this should
+#   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g)
 # - SPARK_CLASSPATH, to add elements to Spark's classpath
 # - SPARK_JAVA_OPTS, to add JVM options
-# - SPARK_MEM, to change the amount of memory used per node (this should
-#   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
 # - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
 
-# Settings used by the scripts in the bin/ directory, apply to standalone mode only.
-# Note that the same worker settings apply to all of the workers.
-# - SPARK_MASTER_IP, to bind the master to a different ip address, for example a public one (Default: local ip address)
-# - SPARK_MASTER_PORT, to start the spark master on a different port (Default: 7077)
-# - SPARK_MASTER_WEBUI_PORT, to specify a different port for the Master WebUI (Default: 8080)
-# - SPARK_WORKER_PORT, to start the spark worker on a specific port (Default: random)
-# - SPARK_WORKER_CORES, to specify the number of cores to use (Default: all available cores)
-# - SPARK_WORKER_MEMORY, to specify how much memory to use, e.g. 1000M, 2G (Default: MAX(Available - 1024MB, 512MB))
-# - SPARK_WORKER_WEBUI_PORT, to specify a different port for the Worker WebUI (Default: 8081)
\ No newline at end of file
diff --git a/docs/configuration.md b/docs/configuration.md
index 4270e50f47..08174878f2 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -5,33 +5,45 @@ title: Spark Configuration
 
 Spark provides three main locations to configure the system:
 
-* The [`conf/spark-env.sh` script](#environment-variables-in-spark-envsh), in which you can set environment variables
-  that affect how the JVM is launched, such as, most notably, the amount of memory per JVM.
+* [Environment variables](#environment-variables) for launching Spark workers, which can
+  be set either in your driver program or in the `conf/spark-env.sh` script.
 * [Java system properties](#system-properties), which control internal configuration parameters and can be set either
   programmatically (by calling `System.setProperty` *before* creating a `SparkContext`) or through the
   `SPARK_JAVA_OPTS` environment variable in `spark-env.sh`.
 * [Logging configuration](#configuring-logging), which is done through `log4j.properties`.
 
 
-# Environment Variables in spark-env.sh
+# Environment Variables
 
 Spark determines how to initialize the JVM on worker nodes, or even on the local node when you run `spark-shell`,
 by running the `conf/spark-env.sh` script in the directory where it is installed. This script does not exist by default
 in the Git repository, but but you can create it by copying `conf/spark-env.sh.template`. Make sure that you make
 the copy executable.
 
-Inside `spark-env.sh`, you can set the following environment variables:
+Inside `spark-env.sh`, you *must* set at least the following two environment variables:
 
 * `SCALA_HOME` to point to your Scala installation.
 * `MESOS_NATIVE_LIBRARY` if you are [running on a Mesos cluster](running-on-mesos.html).
-* `SPARK_MEM` to set the amount of memory used per node (this should be in the same format as the JVM's -Xmx option, e.g. `300m` or `1g`)
+
+In addition, there are four other variables that control execution. These can be set *either in `spark-env.sh`
+or in each job's driver program*, because they will automatically be propagated to workers from the driver.
+For a multi-user environment, we recommend setting the in the driver program instead of `spark-env.sh`, so
+that different user jobs can use different amounts of memory, JVM options, etc.
+
+* `SPARK_MEM` to set the amount of memory used per node (this should be in the same format as the 
+   JVM's -Xmx option, e.g. `300m` or `1g`)
 * `SPARK_JAVA_OPTS` to add JVM options. This includes any system properties that you'd like to pass with `-D`.
 * `SPARK_CLASSPATH` to add elements to Spark's classpath.
 * `SPARK_LIBRARY_PATH` to add search directories for native libraries.
 
-The most important things to set first will be `SCALA_HOME`, without which `spark-shell` cannot run, and `MESOS_NATIVE_LIBRARY`
-if running on Mesos. The next setting will probably be the memory (`SPARK_MEM`). Make sure you set it high enough to be able to run your job but lower than the total memory on the machines (leave at least 1 GB for the operating system).
+Note that if you do set these in `spark-env.sh`, they will override the values set by user programs, which
+is undesirable; you can choose to have `spark-env.sh` set them only if the user program hasn't, as follows:
 
+{% highlight bash %}
+if [ -z "$SPARK_MEM" ] ; then
+  SPARK_MEM="1g"
+fi
+{% endhighlight %}
 
 # System Properties
 
diff --git a/run b/run
index cb1499c6f9..15db23bbe0 100755
--- a/run
+++ b/run
@@ -19,12 +19,6 @@ if [ -z "$SCALA_HOME" ]; then
   exit 1
 fi
 
-# If the user specifies a Mesos JAR, put it before our included one on the classpath
-MESOS_CLASSPATH=""
-if [ -n "$MESOS_JAR" ] ; then
-  MESOS_CLASSPATH="$MESOS_JAR"
-fi
-
 # Figure out how much memory to use per executor and set it as an environment
 # variable so that our process sees it and can report it to Mesos
 if [ -z "$SPARK_MEM" ] ; then
@@ -49,7 +43,6 @@ BAGEL_DIR="$FWDIR/bagel"
 
 # Build up classpath
 CLASSPATH="$SPARK_CLASSPATH"
-CLASSPATH+=":$MESOS_CLASSPATH"
 CLASSPATH+=":$FWDIR/conf"
 CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/classes"
 if [ -n "$SPARK_TESTING" ] ; then
-- 
cgit v1.2.3


From f595bb53d17d1ff84453214376445ae3c82c1222 Mon Sep 17 00:00:00 2001
From: Thomas Dudziak <tomdzk@gmail.com>
Date: Mon, 22 Oct 2012 13:10:47 -0700
Subject: Tweaked run file to live more happily with typesafe's debian package

---
 run | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

(limited to 'run')

diff --git a/run b/run
index 15db23bbe0..83175e84de 100755
--- a/run
+++ b/run
@@ -13,10 +13,33 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
-# Check that SCALA_HOME has been specified
-if [ -z "$SCALA_HOME" ]; then
-  echo "SCALA_HOME is not set" >&2
-  exit 1
+if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
+  if [ `command -v scala` ]; then
+    RUNNER="scala"
+  else
+    if [ -z "$SCALA_HOME" ]; then
+      echo "SCALA_HOME is not set" >&2
+      exit 1
+    fi
+    RUNNER="${SCALA_HOME}/bin/scala"
+  fi
+else
+  if [ `command -v java` ]; then
+    RUNNER="java"
+  else
+    if [ -z "$JAVA_HOME" ]; then
+      echo "JAVA_HOME is not set" >&2
+      exit 1
+    fi
+    RUNNER="${JAVA_HOME}/bin/java"
+  fi
+  if [ -z "$SCALA_LIBRARY_PATH" ]; then
+    if [ -z "$SCALA_HOME" ]; then
+      echo "SCALA_HOME is not set" >&2
+      exit 1
+    fi
+    SCALA_LIBRARY_PATH="$SCALA_HOME/lib"
+  fi
 fi
 
 # Figure out how much memory to use per executor and set it as an environment
@@ -70,17 +93,11 @@ export CLASSPATH # Needed for spark-shell
 # the Spark shell, the wrapper is necessary to properly reset the terminal
 # when we exit, so we allow it to set a variable to launch with scala.
 if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
-  RUNNER="${SCALA_HOME}/bin/scala"
   EXTRA_ARGS=""     # Java options will be passed to scala as JAVA_OPTS
 else
-  CLASSPATH+=":$SCALA_HOME/lib/scala-library.jar"
-  CLASSPATH+=":$SCALA_HOME/lib/scala-compiler.jar"
-  CLASSPATH+=":$SCALA_HOME/lib/jline.jar"
-  if [ -n "$JAVA_HOME" ]; then
-    RUNNER="${JAVA_HOME}/bin/java"
-  else
-    RUNNER=java
-  fi
+  CLASSPATH+=":$SCALA_LIBRARY_PATH/scala-library.jar"
+  CLASSPATH+=":$SCALA_LIBRARY_PATH/scala-compiler.jar"
+  CLASSPATH+=":$SCALA_LIBRARY_PATH/jline.jar"
   # The JVM doesn't read JAVA_OPTS by default so we need to pass it in
   EXTRA_ARGS="$JAVA_OPTS"
 fi
-- 
cgit v1.2.3