aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bin/compute-classpath.cmd46
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala28
-rw-r--r--docs/spark-standalone.md4
-rw-r--r--run-example.cmd23
-rw-r--r--run-example2.cmd61
-rw-r--r--sbt/sbt.cmd2
-rwxr-xr-xspark-class2
-rw-r--r--spark-class.cmd (renamed from run.cmd)5
-rw-r--r--spark-class2.cmd (renamed from run2.cmd)56
-rw-r--r--spark-shell.cmd4
10 files changed, 148 insertions, 83 deletions
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 9178b852e6..cf38188c4b 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -28,30 +28,27 @@ set FWDIR=%~dp0..\
rem Load environment variables from conf\spark-env.cmd, if it exists
if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
-set CORE_DIR=%FWDIR%core
-set REPL_DIR=%FWDIR%repl
-set EXAMPLES_DIR=%FWDIR%examples
-set BAGEL_DIR=%FWDIR%bagel
-set MLLIB_DIR=%FWDIR%mllib
-set TOOLS_DIR=%FWDIR%tools
-set YARN_DIR=%FWDIR%yarn
-set STREAMING_DIR=%FWDIR%streaming
-set PYSPARK_DIR=%FWDIR%python
-
rem Build up classpath
-set CLASSPATH=%SPARK_CLASSPATH%;%MESOS_CLASSPATH%;%FWDIR%conf;%CORE_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%CORE_DIR%\target\scala-%SCALA_VERSION%\test-classes;%CORE_DIR%\src\main\resources
-set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\classes;%STREAMING_DIR%\target\scala-%SCALA_VERSION%\test-classes
-set CLASSPATH=%CLASSPATH%;%STREAMING_DIR%\lib\org\apache\kafka\kafka\0.7.2-spark\*
-set CLASSPATH=%CLASSPATH%;%REPL_DIR%\target\scala-%SCALA_VERSION%\classes;%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\jars\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%lib_managed\bundles\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
-set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
-set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%MLLIB_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%TOOLS_DIR%\target\scala-%SCALA_VERSION%\classes
-set CLASSPATH=%CLASSPATH%;%YARN_DIR%\target\scala-%SCALA_VERSION%\classes
+set CLASSPATH=%SPARK_CLASSPATH%;%FWDIR%conf
+if exist "%FWDIR%RELEASE" (
+ for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+ set ASSEMBLY_JAR=%%d
+ )
+) else (
+ for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
+ set ASSEMBLY_JAR=%%d
+ )
+)
+set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
+
+if "x%SPARK_TESTING%"=="x1" (
+ rem Add test clases to path
+ set CLASSPATH=%CLASSPATH%;%FWDIR%core\target\scala-%SCALA_VERSION%\test-classes
+ set CLASSPATH=%CLASSPATH%;%FWDIR%repl\target\scala-%SCALA_VERSION%\test-classes
+ set CLASSPATH=%CLASSPATH%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\test-classes
+ set CLASSPATH=%CLASSPATH%;%FWDIR%bagel\target\scala-%SCALA_VERSION%\test-classes
+ set CLASSPATH=%CLASSPATH%;%FWDIR%streaming\target\scala-%SCALA_VERSION%\test-classes
+)
rem Add hadoop conf dir - else FileSystem.*, etc fail
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
@@ -64,9 +61,6 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
:no_yarn_conf_dir
-rem Add Scala standard library
-set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar
-
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index faf0c2362a..f2641851cb 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -631,20 +631,26 @@ class SparkContext(
* filesystems), or an HTTP, HTTPS or FTP URI.
*/
def addJar(path: String) {
- if (null == path) {
+ if (path == null) {
logWarning("null specified as parameter to addJar",
new SparkException("null specified as parameter to addJar"))
} else {
- val env = SparkEnv.get
- val uri = new URI(path)
- val key = uri.getScheme match {
- case null | "file" =>
- if (env.hadoop.isYarnMode()) {
- logWarning("local jar specified as parameter to addJar under Yarn mode")
- return
- }
- env.httpFileServer.addJar(new File(uri.getPath))
- case _ => path
+ var key = ""
+ if (path.contains("\\")) {
+ // For local paths with backslashes on Windows, URI throws an exception
+ key = env.httpFileServer.addJar(new File(path))
+ } else {
+ val uri = new URI(path)
+ key = uri.getScheme match {
+ case null | "file" =>
+ if (env.hadoop.isYarnMode()) {
+ logWarning("local jar specified as parameter to addJar under Yarn mode")
+ return
+ }
+ env.httpFileServer.addJar(new File(uri.getPath))
+ case _ =>
+ path
+ }
}
addedJars(key) = System.currentTimeMillis
logInfo("Added JAR " + path + " at " + key + " with timestamp " + addedJars(key))
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 9ab6ba0830..994a96f2c9 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -9,7 +9,7 @@ In addition to running on the Mesos or YARN cluster managers, Spark also provide
You can start a standalone master server by executing:
- ./spark-class spark.deploy.master.Master
+ ./spark-class org.apache.spark.deploy.master.Master
Once started, the master will print out a `spark://HOST:PORT` URL for itself, which you can use to connect workers to it,
or pass as the "master" argument to `SparkContext`. You can also find this URL on
@@ -17,7 +17,7 @@ the master's web UI, which is [http://localhost:8080](http://localhost:8080) by
Similarly, you can start one or more workers and connect them to the master via:
- ./spark-class spark.deploy.worker.Worker spark://IP:PORT
+ ./spark-class org.apache.spark.deploy.worker.Worker spark://IP:PORT
Once you have started a worker, look at the master's web UI ([http://localhost:8080](http://localhost:8080) by default).
You should see the new node listed there, along with its number of CPUs and memory (minus one gigabyte left for the OS).
diff --git a/run-example.cmd b/run-example.cmd
new file mode 100644
index 0000000000..5b2d048d6e
--- /dev/null
+++ b/run-example.cmd
@@ -0,0 +1,23 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements. See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License. You may obtain a copy of the License at
+rem
+rem http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running a Spark example. To avoid polluting
+rem the environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0run-example2.cmd %*
diff --git a/run-example2.cmd b/run-example2.cmd
new file mode 100644
index 0000000000..dbb371ecfc
--- /dev/null
+++ b/run-example2.cmd
@@ -0,0 +1,61 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements. See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License. You may obtain a copy of the License at
+rem
+rem http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+set SCALA_VERSION=2.9.3
+
+rem Figure out where the Spark framework is installed
+set FWDIR=%~dp0
+
+rem Export this as SPARK_HOME
+set SPARK_HOME=%FWDIR%
+
+rem Load environment variables from conf\spark-env.cmd, if it exists
+if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
+
+rem Test that an argument was given
+if not "x%1"=="x" goto arg_given
+ echo Usage: run-example ^<example-class^> [^<args^>]
+ goto exit
+:arg_given
+
+set EXAMPLES_DIR=%FWDIR%examples
+
+rem Figure out the JAR file that our examples were packaged into.
+set SPARK_EXAMPLES_JAR=
+for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do (
+ set SPARK_EXAMPLES_JAR=%%d
+)
+if "x%SPARK_EXAMPLES_JAR%"=="x" (
+ echo Failed to find Spark examples assembly JAR.
+ echo You need to build Spark with sbt\sbt assembly before running this program.
+ goto exit
+)
+
+rem Compute Spark classpath using external script
+set DONT_PRINT_CLASSPATH=1
+call "%FWDIR%bin\compute-classpath.cmd"
+set DONT_PRINT_CLASSPATH=0
+set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
+
+rem Figure out where java is.
+set RUNNER=java
+if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+
+"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
+:exit
diff --git a/sbt/sbt.cmd b/sbt/sbt.cmd
index 56a16d77f2..681fe00f92 100644
--- a/sbt/sbt.cmd
+++ b/sbt/sbt.cmd
@@ -22,4 +22,4 @@ if not "%MESOS_HOME%x"=="x" set EXTRA_ARGS=-Djava.library.path=%MESOS_HOME%\lib\
set SPARK_HOME=%~dp0..
-java -Xmx1200M -XX:MaxPermSize=200m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*"
+java -Xmx1200M -XX:MaxPermSize=200m -XX:ReservedCodeCacheSize=256m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*"
diff --git a/spark-class b/spark-class
index 71dfb9c4e6..037abda3b7 100755
--- a/spark-class
+++ b/spark-class
@@ -31,7 +31,7 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
fi
if [ -z "$1" ]; then
- echo "Usage: run <spark-class> [<args>]" >&2
+ echo "Usage: spark-class <class> [<args>]" >&2
exit 1
fi
diff --git a/run.cmd b/spark-class.cmd
index c91764e617..19850db9e1 100644
--- a/run.cmd
+++ b/spark-class.cmd
@@ -17,4 +17,7 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem
-cmd /V /E /C %~dp0run2.cmd %*
+rem This is the entry point for running a Spark class. To avoid polluting
+rem the environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0spark-class2.cmd %*
diff --git a/run2.cmd b/spark-class2.cmd
index dc5e50931e..d4d853e8ad 100644
--- a/run2.cmd
+++ b/spark-class2.cmd
@@ -30,7 +30,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
rem Test that an argument was given
if not "x%1"=="x" goto arg_given
- echo Usage: run ^<spark-class^> [^<args^>]
+ echo Usage: spark-class ^<class^> [^<args^>]
goto exit
:arg_given
@@ -44,12 +44,6 @@ rem Do not overwrite SPARK_JAVA_OPTS environment variable in this script
if "%RUNNING_DAEMON%"=="0" set OUR_JAVA_OPTS=%SPARK_JAVA_OPTS%
if "%RUNNING_DAEMON%"=="1" set OUR_JAVA_OPTS=%SPARK_DAEMON_JAVA_OPTS%
-rem Check that SCALA_HOME has been specified
-if not "x%SCALA_HOME%"=="x" goto scala_exists
- echo SCALA_HOME is not set
- goto exit
-:scala_exists
-
rem Figure out how much memory to use per executor and set it as an environment
rem variable so that our process sees it and can report it to Mesos
if "x%SPARK_MEM%"=="x" set SPARK_MEM=512m
@@ -58,43 +52,27 @@ rem Set JAVA_OPTS to be able to load native libraries and to set heap size
set JAVA_OPTS=%OUR_JAVA_OPTS% -Djava.library.path=%SPARK_LIBRARY_PATH% -Xms%SPARK_MEM% -Xmx%SPARK_MEM%
rem Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in ExecutorRunner.scala!
-set CORE_DIR=%FWDIR%core
-set EXAMPLES_DIR=%FWDIR%examples
-set REPL_DIR=%FWDIR%repl
+rem Test whether the user has built Spark
+if exist "%FWDIR%RELEASE" goto skip_build_test
+set FOUND_JAR=0
+for %%d in ("%FWDIR%assembly\target\scala-%SCALA_VERSION%\spark-assembly*hadoop*.jar") do (
+ set FOUND_JAR=1
+)
+if "%FOUND_JAR%"=="0" (
+ echo Failed to find Spark assembly JAR.
+ echo You need to build Spark with sbt\sbt assembly before running this program.
+ goto exit
+)
+:skip_build_test
rem Compute classpath using external script
set DONT_PRINT_CLASSPATH=1
call "%FWDIR%bin\compute-classpath.cmd"
set DONT_PRINT_CLASSPATH=0
-rem Figure out the JAR file that our examples were packaged into.
-rem First search in the build path from SBT:
-for %%d in ("examples/target/scala-%SCALA_VERSION%/spark-examples*.jar") do (
- set SPARK_EXAMPLES_JAR=examples/target/scala-%SCALA_VERSION%/%%d
-)
-rem Then search in the build path from Maven:
-for %%d in ("examples/target/spark-examples*hadoop*.jar") do (
- set SPARK_EXAMPLES_JAR=examples/target/%%d
-)
-
-rem Figure out whether to run our class with java or with the scala launcher.
-rem In most cases, we'd prefer to execute our process with java because scala
-rem creates a shell script as the parent of its Java process, which makes it
-rem hard to kill the child with stuff like Process.destroy(). However, for
-rem the Spark shell, the wrapper is necessary to properly reset the terminal
-rem when we exit, so we allow it to set a variable to launch with scala.
-if "%SPARK_LAUNCH_WITH_SCALA%" NEQ 1 goto java_runner
- set RUNNER=%SCALA_HOME%\bin\scala
- # Java options will be passed to scala as JAVA_OPTS
- set EXTRA_ARGS=
- goto run_spark
-:java_runner
- set CLASSPATH=%CLASSPATH%;%SCALA_HOME%\lib\scala-library.jar;%SCALA_HOME%\lib\scala-compiler.jar;%SCALA_HOME%\lib\jline.jar
- set RUNNER=java
- if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
- rem The JVM doesn't read JAVA_OPTS by default so we need to pass it in
- set EXTRA_ARGS=%JAVA_OPTS%
-:run_spark
+rem Figure out where java is.
+set RUNNER=java
+if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
-"%RUNNER%" -cp "%CLASSPATH%" %EXTRA_ARGS% %*
+"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
:exit
diff --git a/spark-shell.cmd b/spark-shell.cmd
index b9b4d4bfb2..3e52bf835e 100644
--- a/spark-shell.cmd
+++ b/spark-shell.cmd
@@ -18,5 +18,5 @@ rem limitations under the License.
rem
set FWDIR=%~dp0
-set SPARK_LAUNCH_WITH_SCALA=1
-cmd /V /E /C %FWDIR%run2.cmd org.apache.spark.repl.Main %*
+
+cmd /V /E /C %FWDIR%spark-class2.cmd org.apache.spark.repl.Main %*