aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatei Zaharia <matei@databricks.com>2014-05-19 15:02:35 -0700
committerTathagata Das <tathagata.das1565@gmail.com>2014-05-19 15:02:35 -0700
commit7b70a7071894dd90ea1d0091542b3e13e7ef8d3a (patch)
treee24b0a208b0c2290e6f1b6a6beda520f36ed1fa3
parentdf0aa8353ab6d3b19d838c6fa95a93a64948309f (diff)
downloadspark-7b70a7071894dd90ea1d0091542b3e13e7ef8d3a.tar.gz
spark-7b70a7071894dd90ea1d0091542b3e13e7ef8d3a.tar.bz2
spark-7b70a7071894dd90ea1d0091542b3e13e7ef8d3a.zip
[SPARK-1876] Windows fixes to deal with latest distribution layout changes
- Look for JARs in the right place - Launch examples the same way as on Unix - Load datanucleus JARs if they exist - Don't attempt to parse local paths as URIs in SparkSubmit, since paths with C:\ are not valid URIs - Also fixed POM exclusion rules for datanucleus (it wasn't properly excluding it, whereas SBT was) Author: Matei Zaharia <matei@databricks.com> Closes #819 from mateiz/win-fixes and squashes the following commits: d558f96 [Matei Zaharia] Fix comment 228577b [Matei Zaharia] Review comments d3b71c7 [Matei Zaharia] Properly exclude datanucleus files in Maven assembly 144af84 [Matei Zaharia] Update Windows scripts to match latest binary package layout
-rw-r--r--README.md7
-rw-r--r--assembly/pom.xml2
-rw-r--r--bin/compute-classpath.cmd24
-rwxr-xr-xbin/run-example23
-rw-r--r--bin/run-example2.cmd51
-rwxr-xr-xbin/spark-class2.cmd2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala2
7 files changed, 81 insertions, 30 deletions
diff --git a/README.md b/README.md
index 9c2e32b90f..6211a5889a 100644
--- a/README.md
+++ b/README.md
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
guide, on the project webpage at <http://spark.apache.org/documentation.html>.
This README file only contains basic setup instructions.
-
## Building Spark
Spark is built on Scala 2.10. To build Spark and its example programs, run:
./sbt/sbt assembly
+(You do not need to do this if you downloaded a pre-built package.)
+
## Interactive Scala Shell
The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
Spark also comes with several sample programs in the `examples` directory.
To run one of them, use `./bin/run-example <class> [params]`. For example:
- ./bin/run-example org.apache.spark.examples.SparkLR
+ ./bin/run-example SparkPi
-will run the Logistic Regression example locally.
+will run the Pi example locally.
You can set the MASTER environment variable when running examples to submit
examples to a cluster. This can be a mesos:// or spark:// URL,
diff --git a/assembly/pom.xml b/assembly/pom.xml
index abd8935339..963357b9ab 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -96,7 +96,7 @@
<filter>
<artifact>*:*</artifact>
<excludes>
- <exclude>org.datanucleus:*</exclude>
+ <exclude>org/datanucleus/**</exclude>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index 065553eb31..58710cd1bd 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -20,6 +20,13 @@ rem
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
rem script and the ExecutorRunner in standalone cluster mode.
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !datanucleus_jars! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
set SCALA_VERSION=2.10
rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
rem Build up classpath
set CLASSPATH=%FWDIR%conf
if exist "%FWDIR%RELEASE" (
- for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+ for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
set ASSEMBLY_JAR=%%d
)
) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+ set datanucleus_dir=%FWDIR%lib
+) else (
+ set datanucleus_dir=%FWDIR%lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+ set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
diff --git a/bin/run-example b/bin/run-example
index 146951ac0e..7caab31dae 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
export SPARK_HOME="$FWDIR"
EXAMPLES_DIR="$FWDIR"/examples
+if [ -n "$1" ]; then
+ EXAMPLE_CLASS="$1"
+ shift
+else
+ echo "Usage: ./bin/run-example <example-class> [example-args]"
+ echo " - set MASTER=XX to use a specific master"
+ echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
+ exit 1
+fi
+
if [ -f "$FWDIR/RELEASE" ]; then
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,17 +47,6 @@ fi
EXAMPLE_MASTER=${MASTER:-"local[*]"}
-if [ -n "$1" ]; then
- EXAMPLE_CLASS="$1"
- shift
-else
- echo "usage: ./bin/run-example <example-class> [example-args]"
- echo " - set MASTER=XX to use a specific master"
- echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
- echo
- exit -1
-fi
-
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
fi
@@ -55,5 +54,5 @@ fi
./bin/spark-submit \
--master $EXAMPLE_MASTER \
--class $EXAMPLE_CLASS \
- $SPARK_EXAMPLES_JAR \
+ "$SPARK_EXAMPLES_JAR" \
"$@"
diff --git a/bin/run-example2.cmd b/bin/run-example2.cmd
index 40abb9af74..eadedd7fa6 100644
--- a/bin/run-example2.cmd
+++ b/bin/run-example2.cmd
@@ -30,7 +30,9 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
rem Test that an argument was given
if not "x%1"=="x" goto arg_given
- echo Usage: run-example ^<example-class^> [^<args^>]
+ echo Usage: run-example ^<example-class^> [example-args]
+ echo - set MASTER=XX to use a specific master
+ echo - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)
goto exit
:arg_given
@@ -38,8 +40,14 @@ set EXAMPLES_DIR=%FWDIR%examples
rem Figure out the JAR file that our examples were packaged into.
set SPARK_EXAMPLES_JAR=
-for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*assembly*.jar") do (
- set SPARK_EXAMPLES_JAR=%%d
+if exist "%FWDIR%RELEASE" (
+ for %%d in ("%FWDIR%lib\spark-examples*.jar") do (
+ set SPARK_EXAMPLES_JAR=%%d
+ )
+) else (
+ for %%d in ("%EXAMPLES_DIR%\target\scala-%SCALA_VERSION%\spark-examples*.jar") do (
+ set SPARK_EXAMPLES_JAR=%%d
+ )
)
if "x%SPARK_EXAMPLES_JAR%"=="x" (
echo Failed to find Spark examples assembly JAR.
@@ -47,15 +55,34 @@ if "x%SPARK_EXAMPLES_JAR%"=="x" (
goto exit
)
-rem Compute Spark classpath using external script
-set DONT_PRINT_CLASSPATH=1
-call "%FWDIR%bin\compute-classpath.cmd"
-set DONT_PRINT_CLASSPATH=0
-set CLASSPATH=%SPARK_EXAMPLES_JAR%;%CLASSPATH%
+rem Set master from MASTER environment variable if given
+if "x%MASTER%"=="x" (
+ set EXAMPLE_MASTER=local[*]
+) else (
+ set EXAMPLE_MASTER=%MASTER%
+)
+
+rem If the EXAMPLE_CLASS does not start with org.apache.spark.examples, add that
+set EXAMPLE_CLASS=%1
+set PREFIX=%EXAMPLE_CLASS:~0,25%
+if not %PREFIX%==org.apache.spark.examples (
+ set EXAMPLE_CLASS=org.apache.spark.examples.%EXAMPLE_CLASS%
+)
+
+rem Get the tail of the argument list, to skip the first one. This is surprisingly
+rem complicated on Windows.
+set "ARGS="
+:top
+shift
+if "%~1" neq "" (
+ set ARGS=%ARGS% "%~1"
+ goto :top
+)
+if defined ARGS set ARGS=%ARGS:~1%
-rem Figure out where java is.
-set RUNNER=java
-if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+call "%FWDIR%bin\spark-submit.cmd" ^
+ --master %EXAMPLE_MASTER% ^
+ --class %EXAMPLE_CLASS% ^
+ "%SPARK_EXAMPLES_JAR%" %ARGS%
-"%RUNNER%" -cp "%CLASSPATH%" %JAVA_OPTS% %*
:exit
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 4302c1b6b7..266edd9fa9 100755
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -17,6 +17,8 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem
+setlocal enabledelayedexpansion
+
set SCALA_VERSION=2.10
rem Figure out where the Spark framework is installed
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index a99b2176e2..c54331c00f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -299,7 +299,7 @@ object SparkSubmit {
}
private def addJarToClasspath(localJar: String, loader: ExecutorURLClassLoader) {
- val localJarFile = new File(new URI(localJar).getPath)
+ val localJarFile = new File(localJar)
if (!localJarFile.exists()) {
printWarning(s"Jar $localJar does not exist, skipping.")
}