aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README23
-rwxr-xr-xrun39
2 files changed, 48 insertions, 14 deletions
diff --git a/README b/README
index e0c7603632..6af34a2294 100644
--- a/README
+++ b/README
@@ -1,4 +1,6 @@
-Spark requires Scala 2.8. This version has been tested with 2.8.0RC3.
+BUILDING
+
+Spark requires Scala 2.8. This version has been tested with 2.8.0.final.
To build and run Spark, you will need to have Scala's bin in your $PATH,
or you will need to set the SCALA_HOME environment variable to point
@@ -13,3 +15,22 @@ example programs prints usage help if no params are given.
Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
to have the Makefile use the fsc compiler daemon instead of scalac.
+
+CONFIGURATION
+
+Spark can be configured through two files: conf/java-opts and conf/spark-env.sh.
+
+In java-opts, you can add flags to be passed to the JVM when running Spark.
+
+In spark-env.sh, you can set any environment variables you wish to be available
+when running Spark programs, such as PATH, SCALA_HOME, etc. There are also
+several Spark-specific variables you can set:
+- SPARK_CLASSPATH: Extra entries to be added to the classpath, separated by ":".
+- SPARK_MEM: Memory for Spark to use, in the format used by java's -Xmx option
+ (for example, 200m meams 200 MB, 1g means 1 GB, etc).
+- SPARK_LIBRARY_PATH: Extra entries to add to java.library.path for locating
+ shared libraries.
+- SPARK_JAVA_OPTS: Extra options to pass to JVM.
+
+Note that spark-env.sh must be a shell script (it must be executable and start
+with a #! header to specify the shell to use).
diff --git a/run b/run
index e6723ccd7c..36fbd9d23d 100755
--- a/run
+++ b/run
@@ -3,26 +3,39 @@
# Figure out where the Scala framework is installed
FWDIR=`dirname $0`
-# Set JAVA_OPTS to be able to load libnexus.so and set various other misc options
-export JAVA_OPTS="-Djava.library.path=$FWDIR/third_party:$FWDIR/src/native -Xms100m -Xmx750m"
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+ . $FWDIR/conf/spark-env.sh
+fi
+
+if [ "$SPARK_MEM" == "" ] ; then
+ SPARK_MEM="200m"
+fi
+
+# Set JAVA_OPTS to be able to load native libraries and to set heap size
+JAVA_OPTS="$SPARK_JAVA_OPTS"
+JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH:$FWDIR/third_party:$FWDIR/src/native"
+JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e $FWDIR/conf/java-opts ] ; then
JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
fi
export JAVA_OPTS
# Build up classpath
-CLASSPATH=$FWDIR/build/classes
-CLASSPATH+=:$FWDIR/third_party/nexus.jar
-CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
-CLASSPATH+=:$FWDIR/third_party/colt.jar
-CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
-CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
-CLASSPATH+=:third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
-CLASSPATH+=:third_party/scalacheck_2.8.0.RC3-1.7.jar
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
+SPARK_CLASSPATH+=:$FWDIR/third_party/nexus.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/colt.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
+SPARK_CLASSPATH+=:third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
+SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0.RC3-1.7.jar
for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
- CLASSPATH+=:$jar
+ SPARK_CLASSPATH+=:$jar
done
-export CLASSPATH
+export SPARK_CLASSPATH
+export CLASSPATH=$SPARK_CLASSPATH # Needed for spark-shell
if [ -n "$SCALA_HOME" ]; then
SCALA=${SCALA_HOME}/bin/scala
@@ -30,4 +43,4 @@ else
SCALA=scala
fi
-exec $SCALA -cp $CLASSPATH $@
+exec $SCALA -cp $SPARK_CLASSPATH $@