diff options
author | Matei Zaharia <matei@databricks.com> | 2014-05-19 15:02:35 -0700 |
---|---|---|
committer | Tathagata Das <tathagata.das1565@gmail.com> | 2014-05-19 15:02:35 -0700 |
commit | 7b70a7071894dd90ea1d0091542b3e13e7ef8d3a (patch) | |
tree | e24b0a208b0c2290e6f1b6a6beda520f36ed1fa3 | |
parent | df0aa8353ab6d3b19d838c6fa95a93a64948309f (diff) | |
download | spark-7b70a7071894dd90ea1d0091542b3e13e7ef8d3a.tar.gz spark-7b70a7071894dd90ea1d0091542b3e13e7ef8d3a.tar.bz2 spark-7b70a7071894dd90ea1d0091542b3e13e7ef8d3a.zip |
[SPARK-1876] Windows fixes to deal with latest distribution layout changes
- Look for JARs in the right place
- Launch examples the same way as on Unix
- Load datanucleus JARs if they exist
- Don't attempt to parse local paths as URIs in SparkSubmit, since paths with C:\ are not valid URIs
- Also fixed POM exclusion rules for datanucleus (it wasn't properly excluding it, whereas SBT was)
Author: Matei Zaharia <matei@databricks.com>
Closes #819 from mateiz/win-fixes and squashes the following commits:
d558f96 [Matei Zaharia] Fix comment
228577b [Matei Zaharia] Review comments
d3b71c7 [Matei Zaharia] Properly exclude datanucleus files in Maven assembly
144af84 [Matei Zaharia] Update Windows scripts to match latest binary package layout
-rw-r--r-- | README.md | 7 | ||||
-rw-r--r-- | assembly/pom.xml | 2 | ||||
-rw-r--r-- | bin/compute-classpath.cmd | 24 | ||||
-rwxr-xr-x | bin/run-example | 23 | ||||
-rw-r--r-- | bin/run-example2.cmd | 51 | ||||
-rwxr-xr-x | bin/spark-class2.cmd | 2 | ||||
-rw-r--r-- | core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 |
7 files changed, 81 insertions, 30 deletions
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming guide, on the project webpage at <http://spark.apache.org/documentation.html>. This README file only contains basic setup instructions. - ## Building Spark Spark is built on Scala 2.10. To build Spark and its example programs, run: ./sbt/sbt assembly +(You do not need to do this if you downloaded a pre-built package.) + ## Interactive Scala Shell The easiest way to start using Spark is through the Scala shell: @@ -41,9 +42,9 @@ And run the following command, which should also return 1000: Spark also comes with several sample programs in the `examples` directory. To run one of them, use `./bin/run-example <class> [params]`. For example: - ./bin/run-example org.apache.spark.examples.SparkLR + ./bin/run-example SparkPi -will run the Logistic Regression example locally. +will run the Pi example locally. You can set the MASTER environment variable when running examples to submit examples to a cluster. This can be a mesos:// or spark:// URL, diff --git a/assembly/pom.xml b/assembly/pom.xml index abd8935339..963357b9ab 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -96,7 +96,7 @@ <filter> <artifact>*:*</artifact> <excludes> - <exclude>org.datanucleus:*</exclude> + <exclude>org/datanucleus/**</exclude> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index 065553eb31..58710cd1bd 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -20,6 +20,13 @@ rem rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
rem script and the ExecutorRunner in standalone cluster mode.
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !datanucleus_jars! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
set SCALA_VERSION=2.10
rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd" rem Build up classpath
set CLASSPATH=%FWDIR%conf
if exist "%FWDIR%RELEASE" (
- for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+ for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
set ASSEMBLY_JAR=%%d
)
) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" ( set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+ set datanucleus_dir=%FWDIR%lib
+) else (
+ set datanucleus_dir=%FWDIR%lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+ set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
diff --git a/bin/run-example b/bin/run-example index 146951ac0e..7caab31dae 100755 --- a/bin/run-example +++ b/bin/run-example @@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)" export SPARK_HOME="$FWDIR" EXAMPLES_DIR="$FWDIR"/examples +if [ -n "$1" ]; then + EXAMPLE_CLASS="$1" + shift +else + echo "Usage: ./bin/run-example <example-class> [example-args]" + echo " - set MASTER=XX to use a specific master" + echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)" + exit 1 +fi + if [ -f "$FWDIR/RELEASE" ]; then export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar` elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then @@ -37,17 +47,6 @@ fi EXAMPLE_MASTER=${MASTER:-"local[*]"} -if [ -n "$1" ]; then - EXAMPLE_CLASS="$1" - shift -else - echo "usage: ./bin/run-example <example-class> [example-args]" - echo " - set MASTER=XX to use a specific master" - echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)" - echo - exit -1 -fi - if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS" fi @@ -55,5 +54,5 @@ fi ./bin/spark-submit \ --master $EXAMPLE_MASTER \ --class $EXAMPLE_CLASS \ - $SPARK_EXAMPLES_JAR \ + "$SPARK_EXAMPLES_JAR" \ "$@" diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd index 40abb9af74..eadedd7fa6 100644 --- a/bin/run-example2.cmd +++ b/bin/run-example2.cmd @@ -30,7 +30,9 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd" rem Test that an argument was given if not "x%1"=="x" goto arg_given - echo Usage: run-example ^<example-class^> [^<args^>] + echo Usage: run-example ^<example-class^> [example-args] + echo - set MASTER=XX to use a specific master + echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression) goto exit :arg_given @@ -38,8 +40,14 @@ set EXAMPLES_DIR=%FWDIR%examples rem Figure out the JAR file that our examples were packaged into. set SPARK_EXAMPLES_JAR= -for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do ( - set SPARK_EXAMPLES_JAR=%%d +if exist "%FWDIR%RELEASE" ( + for %%d in ("%FWDIR%lib\spark-examples*.jar") do ( + set SPARK_EXAMPLES_JAR=%%d + ) +) else ( + for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do ( + set SPARK_EXAMPLES_JAR=%%d + ) ) if "x%SPARK_EXAMPLES_JAR%"=="x" ( echo Failed to find Spark examples assembly JAR. @@ -47,15 +55,34 @@ if "x%SPARK_EXAMPLES_JAR%"=="x" ( goto exit ) -rem Compute Spark classpath using external script -set DONT_PRINT_CLASSPATH=1 -call "%FWDIR%bin\compute-classpath.cmd" -set DONT_PRINT_CLASSPATH=0 -set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH% +rem Set master from MASTER environment variable if given +if "x%MASTER%"=="x" ( + set EXAMPLE_MASTER=local[*] +) else ( + set EXAMPLE_MASTER=%MASTER% +) + +rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that +set EXAMPLE_CLASS=%1 +set PREFIX=%EXAMPLE_CLASS:~0,25% +if not %PREFIX%==org.apache.spark.examples ( + set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS% +) + +rem Get the tail of the argument list, to skip the first one. This is surprisingly +rem complicated on Windows. +set "ARGS=" +:top +shift +if "%~1" neq "" ( + set ARGS=%ARGS% "%~1" + goto :top +) +if defined ARGS set ARGS=%ARGS:~1% -rem Figure out where java is. -set RUNNER=java -if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java +call "%FWDIR%bin\spark-submit.cmd" ^ + --master %EXAMPLE_MASTER% ^ + --class %EXAMPLE_CLASS% ^ + "%SPARK_EXAMPLES_JAR%" %ARGS% -"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %* :exit diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd index 4302c1b6b7..266edd9fa9 100755 --- a/bin/spark-class2.cmd +++ b/bin/spark-class2.cmd @@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem +setlocal enabledelayedexpansion + set SCALA_VERSION=2.10 rem Figure out where the Spark framework is installed diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index a99b2176e2..c54331c00f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -299,7 +299,7 @@ object SparkSubmit { } private def addJarToClasspath(localJar: String, loader: ExecutorURLClassLoader) { - val localJarFile = new File(new URI(localJar).getPath) + val localJarFile = new File(localJar) if (!localJarFile.exists()) { printWarning(s"Jar $localJar does not exist, skipping.") } |