From f8ba89da217a1f1fd5c856a95a27a3e535017643 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sun, 15 Dec 2013 18:39:30 -0800 Subject: Fix Cygwin support in several scripts. This allows the spark-shell, spark-class, run-example, make-distribution.sh, and ./bin/start-* scripts to work under Cygwin. Note that this doesn't support PySpark under Cygwin, since that requires many additional `cygpath` calls from within Python and will be non-trivial to implement. This PR was inspired by, and subsumes, #253 (so close #253 after this is merged). --- sbt/sbt | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'sbt') diff --git a/sbt/sbt b/sbt/sbt index c31a0280ff..5942280585 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -17,12 +17,27 @@ # limitations under the License. # -EXTRA_ARGS="" +cygwin=false +case "`uname`" in + CYGWIN*) cygwin=true;; +esac + +EXTRA_ARGS="-Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m" if [ "$MESOS_HOME" != "" ]; then - EXTRA_ARGS="-Djava.library.path=$MESOS_HOME/lib/java" + EXTRA_ARGS="$EXTRA_ARGS -Djava.library.path=$MESOS_HOME/lib/java" fi export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) export SPARK_TESTING=1 # To put test classes on classpath -java -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m $EXTRA_ARGS $SBT_OPTS -jar "$SPARK_HOME"/sbt/sbt-launch-*.jar "$@" +SBT_JAR="$SPARK_HOME"/sbt/sbt-launch-*.jar +if $cygwin; then + SBT_JAR=`cygpath -w $SBT_JAR` + export SPARK_HOME=`cygpath -w $SPARK_HOME` + EXTRA_ARGS="$EXTRA_ARGS -Djline.terminal=jline.UnixTerminal -Dsbt.cygwin=true" + stty -icanon min 1 -echo > /dev/null 2>&1 + java $EXTRA_ARGS $SBT_OPTS -jar $SBT_JAR "$@" + stty icanon echo > /dev/null 2>&1 +else + java $EXTRA_ARGS $SBT_OPTS -jar $SBT_JAR "$@" +fi \ No newline at end of file -- cgit v1.2.3 From 6be4c1119493dea2af9734ad8b59fcded31f2676 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 2 Jan 2014 14:09:37 +0530 Subject: Removed sbt folder and changed docs accordingly --- README.md | 30 ++++++++++++++++----- docs/README.md | 4 +-- docs/_plugins/copy_api_dirs.rb | 4 +-- docs/api.md | 2 +- docs/hadoop-third-party-distributions.md | 2 +- docs/index.md | 6 ++--- docs/python-programming-guide.md | 2 +- docs/quick-start.md | 2 +- docs/running-on-yarn.md | 6 ++--- docs/scala-programming-guide.md | 2 +- make-distribution.sh | 12 +++++++-- pyspark | 2 +- run-example | 2 +- sbt/sbt | 43 ------------------------------- sbt/sbt-launch-0.11.3-2.jar | Bin 1096763 -> 0 bytes sbt/sbt.cmd | 25 ------------------ spark-class | 2 +- 17 files changed, 51 insertions(+), 95 deletions(-) delete mode 100755 sbt/sbt delete mode 100644 sbt/sbt-launch-0.11.3-2.jar delete mode 100644 sbt/sbt.cmd (limited to 'sbt') diff --git a/README.md b/README.md index 1550a8b551..22e7ab8245 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,9 @@ This README file only contains basic setup instructions. ## Building Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT), -which is packaged with it. To build Spark and its example programs, run: +which can be obtained from [here](http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html). To build Spark and its example programs, run: - sbt/sbt assembly + sbt assembly Once you've built Spark, the easiest way to start using it is the shell: @@ -36,6 +36,22 @@ All of the Spark samples take a `` parameter that is the cluster URL to connect to. This can be a mesos:// or spark:// URL, or "local" to run locally with one thread, or "local[N]" to run locally with N threads. +## Running tests + +### With sbt. (you need sbt installed) +Once you have built spark with `sbt assembly` mentioned in [Building](#Building) section. Test suits can be run as follows on *nix based systems using sbt. + +`SPARK_HOME=$(pwd) SPARK_TESTING=1 sbt test` + +TODO: figure out instructions for windows. + +### With maven. + +1. Build assembly by +`mvn package -DskipTests` + +2. Run tests +`mvn test` ## A Note About Hadoop Versions @@ -49,22 +65,22 @@ For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: # Apache Hadoop 1.2.1 - $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly + $ SPARK_HADOOP_VERSION=1.2.1 sbt assembly # Cloudera CDH 4.2.0 with MapReduce v1 - $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly + $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt assembly For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, also set `SPARK_YARN=true`: # Apache Hadoop 2.0.5-alpha - $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly # Cloudera CDH 4.2.0 with MapReduce v2 - $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly + $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt assembly # Apache Hadoop 2.2.X and newer - $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly + $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt assembly When developing a Spark application, specify the Hadoop version by adding the "hadoop-client" artifact to your project's dependencies. For example, if you're diff --git a/docs/README.md b/docs/README.md index dfcf753553..e3d6c9a5bc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -27,10 +27,10 @@ To mark a block of code in your markdown to be syntax highlighted by jekyll duri ## API Docs (Scaladoc and Epydoc) -You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory. +You can build just the Spark scaladoc by running `sbt doc` from the SPARK_PROJECT_ROOT directory. Similarly, you can build just the PySpark epydoc by running `epydoc --config epydoc.conf` from the SPARK_PROJECT_ROOT/pyspark directory. -When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc. The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/). +When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc. The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/). NOTE: To skip the step of building and copying over the Scala and Python API docs, run `SKIP_API=1 jekyll`. diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index 431de909cb..ef9912c808 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -26,8 +26,8 @@ if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1') curr_dir = pwd cd("..") - puts "Running sbt/sbt doc from " + pwd + "; this may take a few minutes..." - puts `sbt/sbt doc` + puts "Running sbt doc from " + pwd + "; this may take a few minutes..." + puts `sbt doc` puts "Moving back into docs dir." cd("docs") diff --git a/docs/api.md b/docs/api.md index e86d07770a..11e2c15324 100644 --- a/docs/api.md +++ b/docs/api.md @@ -3,7 +3,7 @@ layout: global title: Spark API documentation (Scaladoc) --- -Here you can find links to the Scaladoc generated for the Spark sbt subprojects. If the following links don't work, try running `sbt/sbt doc` from the Spark project home directory. +Here you can find links to the Scaladoc generated for the Spark sbt subprojects. If the following links don't work, try running `sbt doc` from the Spark project home directory. - [Spark](api/core/index.html) - [Spark Examples](api/examples/index.html) diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md index de6a2b0a43..141d475ba6 100644 --- a/docs/hadoop-third-party-distributions.md +++ b/docs/hadoop-third-party-distributions.md @@ -12,7 +12,7 @@ with these distributions: When compiling Spark, you'll need to [set the SPARK_HADOOP_VERSION flag](index.html#a-note-about-hadoop-versions): - SPARK_HADOOP_VERSION=1.0.4 sbt/sbt assembly + SPARK_HADOOP_VERSION=1.0.4 sbt assembly The table below lists the corresponding `SPARK_HADOOP_VERSION` code for each CDH/HDP release. Note that some Hadoop releases are binary compatible across client versions. This means the pre-built Spark diff --git a/docs/index.md b/docs/index.md index d3ac696d1e..5278e33e1c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,7 +17,7 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). All you n Spark uses [Simple Build Tool](http://www.scala-sbt.org), which is bundled with it. To compile the code, go into the top-level Spark directory and run - sbt/sbt assembly + sbt assembly For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you will need to use this same version of Scala in your own program -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/). @@ -56,12 +56,12 @@ Hadoop, you must build Spark against the same version that your cluster uses. By default, Spark links to Hadoop 1.0.4. You can change this by setting the `SPARK_HADOOP_VERSION` variable when compiling: - SPARK_HADOOP_VERSION=2.2.0 sbt/sbt assembly + SPARK_HADOOP_VERSION=2.2.0 sbt assembly In addition, if you wish to run Spark on [YARN](running-on-yarn.html), set `SPARK_YARN` to `true`: - SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly Note that on Windows, you need to set the environment variables on separate lines, e.g., `set SPARK_HADOOP_VERSION=1.2.1`. diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md index 55e39b1de1..a33977ed82 100644 --- a/docs/python-programming-guide.md +++ b/docs/python-programming-guide.md @@ -69,7 +69,7 @@ The script automatically adds the `pyspark` package to the `PYTHONPATH`. The `pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options: {% highlight bash %} -$ sbt/sbt assembly +$ sbt assembly $ ./pyspark {% endhighlight %} diff --git a/docs/quick-start.md b/docs/quick-start.md index 8f782db5b8..5c55def398 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -12,7 +12,7 @@ See the [programming guide](scala-programming-guide.html) for a more complete re To follow along with this guide, you only need to have successfully built Spark on one machine. Simply go into your Spark directory and run: {% highlight bash %} -$ sbt/sbt assembly +$ sbt assembly {% endhighlight %} # Interactive Analysis with the Spark Shell diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index aa75ca4324..13d5fd3685 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -12,7 +12,7 @@ was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0. We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster. This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows: - SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt assembly The assembled JAR will be something like this: `./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`. @@ -25,7 +25,7 @@ The build process now also supports new YARN versions (2.2.x). See below. - The assembled jar can be installed into HDFS or used locally. - Your application code must be packaged into a separate JAR file. -If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. +If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. # Configuration @@ -72,7 +72,7 @@ The command to launch the YARN Client is as follows: For example: # Build the Spark assembly JAR and the Spark examples JAR - $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt assembly # Configure logging $ cp conf/log4j.properties.template conf/log4j.properties diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md index 56d2a3a4a0..3e7075c382 100644 --- a/docs/scala-programming-guide.md +++ b/docs/scala-programming-guide.md @@ -31,7 +31,7 @@ In addition, if you wish to access an HDFS cluster, you need to add a dependency artifactId = hadoop-client version = -For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions). +For other build systems, you can run `sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions). Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines: diff --git a/make-distribution.sh b/make-distribution.sh index 32bbdb90a5..a2c8e64597 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -43,7 +43,13 @@ DISTDIR="$FWDIR/dist" # Get version from SBT export TERM=dumb # Prevents color codes in SBT output -VERSION=$($FWDIR/sbt/sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') + +if ! test `which sbt` ;then + echo -e "You need sbt installed and available on path, please follow the instructions here: http://www.scala-sbt.org/release/docs/Getting-Started/Setup.html" + exit -1; +fi + +VERSION=$(sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') # Initialize defaults SPARK_HADOOP_VERSION=1.0.4 @@ -83,7 +89,9 @@ fi # Build fat JAR export SPARK_HADOOP_VERSION export SPARK_YARN -"$FWDIR/sbt/sbt" "assembly/assembly" +cd $FWDIR + +"sbt" "assembly/assembly" # Make directories rm -rf "$DISTDIR" diff --git a/pyspark b/pyspark index 12cc926dda..1d003e2a00 100755 --- a/pyspark +++ b/pyspark @@ -31,7 +31,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar >& /dev/null if [[ $? != 0 ]]; then echo "Failed to find Spark assembly in $FWDIR/assembly/target" >&2 - echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 + echo "You need to build Spark with sbt assembly before running this program" >&2 exit 1 fi fi diff --git a/run-example b/run-example index a78192d31d..fbd81fe6f3 100755 --- a/run-example +++ b/run-example @@ -55,7 +55,7 @@ if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar ]; then fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2 - echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 + echo "You need to build Spark with sbt assembly before running this program" >&2 exit 1 fi diff --git a/sbt/sbt b/sbt/sbt deleted file mode 100755 index 5942280585..0000000000 --- a/sbt/sbt +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -cygwin=false -case "`uname`" in - CYGWIN*) cygwin=true;; -esac - -EXTRA_ARGS="-Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m" -if [ "$MESOS_HOME" != "" ]; then - EXTRA_ARGS="$EXTRA_ARGS -Djava.library.path=$MESOS_HOME/lib/java" -fi - -export SPARK_HOME=$(cd "$(dirname $0)/.." 2>&1 >/dev/null ; pwd) -export SPARK_TESTING=1 # To put test classes on classpath - -SBT_JAR="$SPARK_HOME"/sbt/sbt-launch-*.jar -if $cygwin; then - SBT_JAR=`cygpath -w $SBT_JAR` - export SPARK_HOME=`cygpath -w $SPARK_HOME` - EXTRA_ARGS="$EXTRA_ARGS -Djline.terminal=jline.UnixTerminal -Dsbt.cygwin=true" - stty -icanon min 1 -echo > /dev/null 2>&1 - java $EXTRA_ARGS $SBT_OPTS -jar $SBT_JAR "$@" - stty icanon echo > /dev/null 2>&1 -else - java $EXTRA_ARGS $SBT_OPTS -jar $SBT_JAR "$@" -fi \ No newline at end of file diff --git a/sbt/sbt-launch-0.11.3-2.jar b/sbt/sbt-launch-0.11.3-2.jar deleted file mode 100644 index 23e5c3f311..0000000000 Binary files a/sbt/sbt-launch-0.11.3-2.jar and /dev/null differ diff --git a/sbt/sbt.cmd b/sbt/sbt.cmd deleted file mode 100644 index 681fe00f92..0000000000 --- a/sbt/sbt.cmd +++ /dev/null @@ -1,25 +0,0 @@ -@echo off - -rem -rem Licensed to the Apache Software Foundation (ASF) under one or more -rem contributor license agreements. See the NOTICE file distributed with -rem this work for additional information regarding copyright ownership. -rem The ASF licenses this file to You under the Apache License, Version 2.0 -rem (the "License"); you may not use this file except in compliance with -rem the License. You may obtain a copy of the License at -rem -rem http://www.apache.org/licenses/LICENSE-2.0 -rem -rem Unless required by applicable law or agreed to in writing, software -rem distributed under the License is distributed on an "AS IS" BASIS, -rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -rem See the License for the specific language governing permissions and -rem limitations under the License. -rem - -set EXTRA_ARGS= -if not "%MESOS_HOME%x"=="x" set EXTRA_ARGS=-Djava.library.path=%MESOS_HOME%\lib\java - -set SPARK_HOME=%~dp0.. - -java -Xmx1200M -XX:MaxPermSize=200m -XX:ReservedCodeCacheSize=256m %EXTRA_ARGS% -jar %SPARK_HOME%\sbt\sbt-launch-0.11.3-2.jar "%*" diff --git a/spark-class b/spark-class index 1858ea6247..254ddee04a 100755 --- a/spark-class +++ b/spark-class @@ -104,7 +104,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar") if [ "$num_jars" -eq "0" ]; then echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2 - echo "You need to build Spark with 'sbt/sbt assembly' before running this program." >&2 + echo "You need to build Spark with 'sbt assembly' before running this program." >&2 exit 1 fi if [ "$num_jars" -gt "1" ]; then -- cgit v1.2.3 From 9e9a913c2ffe0b01f051c18018740934a6a8691e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 4 Jan 2014 20:08:35 -0800 Subject: Add a script to download sbt if not present on the system --- .gitignore | 1 + project/build.properties | 2 ++ sbt | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100755 sbt (limited to 'sbt') diff --git a/.gitignore b/.gitignore index 399362f7d3..1692bde20f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ *.iml *.iws .idea/ +.sbtlib/*.jar .settings .cache /build/ diff --git a/project/build.properties b/project/build.properties index 9647277162..059edac7d4 100644 --- a/project/build.properties +++ b/project/build.properties @@ -15,4 +15,6 @@ # limitations under the License. # +# Note: If you change the sbt version please also change the sbt download +# script in the root directory of the project called sbt. sbt.version=0.12.4 diff --git a/sbt b/sbt new file mode 100755 index 0000000000..e04a20c48a --- /dev/null +++ b/sbt @@ -0,0 +1,48 @@ +#!/bin/bash +# This script launches sbt for this project. If present it uses the system +# version of sbt. If there is no system version of sbt it attempts to download +# sbt locally. +SBT_VERSION=0.12.4 +URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar +URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar +JAR=.sbtlib/sbt-launch-${SBT_VERSION}.jar + +printf "Checking for system sbt [" +if hash sbt 2>/dev/null; then + printf "FOUND]\n" + # Use System SBT + sbt +else + printf "NOT FOUND]\n" + # Download sbt or use allready downloaded + if [ ! -d .sbtlib ]; then + mkdir .sbtlib + fi + if [ ! -f ${JAR} ]; then + # Download + printf "Attempting to fetch sbt\n" + if hash curl 2>/dev/null; then + curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR} + elif hash wget 2>/dev/null; then + wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} + else + printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" + exit + fi + fi + if [ ! -f ${JAR} ]; then + # We failed to download + printf "Our attempt to download sbt locally to {$JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit + fi + printf "Launching sbt from .sbtlib\n" + java \ + -Duser.timezone=UTC \ + -Djava.awt.headless=true \ + -Dfile.encoding=UTF-8 \ + -XX:MaxPermSize=256m \ + -Xmx1g \ + -noverify \ + -jar ${JAR} \ + "$@" +fi -- cgit v1.2.3 From 97123be1d7d1b68ec0cda09fd5894fc4af5f82c5 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 4 Jan 2014 20:16:56 -0800 Subject: Pass commands down to system sbt as well --- sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sbt') diff --git a/sbt b/sbt index e04a20c48a..8cff6df69f 100755 --- a/sbt +++ b/sbt @@ -11,7 +11,7 @@ printf "Checking for system sbt [" if hash sbt 2>/dev/null; then printf "FOUND]\n" # Use System SBT - sbt + sbt $@ else printf "NOT FOUND]\n" # Download sbt or use allready downloaded -- cgit v1.2.3 From d2a5c75a4d3ca5123fb77ad24beb1b4474b36a4f Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 4 Jan 2014 21:44:04 -0800 Subject: Spelling --- sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sbt') diff --git a/sbt b/sbt index 8cff6df69f..d21806ed83 100755 --- a/sbt +++ b/sbt @@ -14,7 +14,7 @@ if hash sbt 2>/dev/null; then sbt $@ else printf "NOT FOUND]\n" - # Download sbt or use allready downloaded + # Download sbt or use already downloaded if [ ! -d .sbtlib ]; then mkdir .sbtlib fi -- cgit v1.2.3 From 0d6700eb5a901caad511bed3d21e43f16adcf192 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 4 Jan 2014 21:44:26 -0800 Subject: Make sbt in the sbt directory --- sbt | 48 ------------------------------------------------ sbt/sbt | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 48 deletions(-) delete mode 100755 sbt create mode 100755 sbt/sbt (limited to 'sbt') diff --git a/sbt b/sbt deleted file mode 100755 index d21806ed83..0000000000 --- a/sbt +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -# This script launches sbt for this project. If present it uses the system -# version of sbt. If there is no system version of sbt it attempts to download -# sbt locally. -SBT_VERSION=0.12.4 -URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar -URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar -JAR=.sbtlib/sbt-launch-${SBT_VERSION}.jar - -printf "Checking for system sbt [" -if hash sbt 2>/dev/null; then - printf "FOUND]\n" - # Use System SBT - sbt $@ -else - printf "NOT FOUND]\n" - # Download sbt or use already downloaded - if [ ! -d .sbtlib ]; then - mkdir .sbtlib - fi - if [ ! -f ${JAR} ]; then - # Download - printf "Attempting to fetch sbt\n" - if hash curl 2>/dev/null; then - curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR} - elif hash wget 2>/dev/null; then - wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} - else - printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" - exit - fi - fi - if [ ! -f ${JAR} ]; then - # We failed to download - printf "Our attempt to download sbt locally to {$JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" - exit - fi - printf "Launching sbt from .sbtlib\n" - java \ - -Duser.timezone=UTC \ - -Djava.awt.headless=true \ - -Dfile.encoding=UTF-8 \ - -XX:MaxPermSize=256m \ - -Xmx1g \ - -noverify \ - -jar ${JAR} \ - "$@" -fi diff --git a/sbt/sbt b/sbt/sbt new file mode 100755 index 0000000000..d21806ed83 --- /dev/null +++ b/sbt/sbt @@ -0,0 +1,48 @@ +#!/bin/bash +# This script launches sbt for this project. If present it uses the system +# version of sbt. If there is no system version of sbt it attempts to download +# sbt locally. +SBT_VERSION=0.12.4 +URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar +URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar +JAR=.sbtlib/sbt-launch-${SBT_VERSION}.jar + +printf "Checking for system sbt [" +if hash sbt 2>/dev/null; then + printf "FOUND]\n" + # Use System SBT + sbt $@ +else + printf "NOT FOUND]\n" + # Download sbt or use already downloaded + if [ ! -d .sbtlib ]; then + mkdir .sbtlib + fi + if [ ! -f ${JAR} ]; then + # Download + printf "Attempting to fetch sbt\n" + if hash curl 2>/dev/null; then + curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR} + elif hash wget 2>/dev/null; then + wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} + else + printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" + exit + fi + fi + if [ ! -f ${JAR} ]; then + # We failed to download + printf "Our attempt to download sbt locally to {$JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit + fi + printf "Launching sbt from .sbtlib\n" + java \ + -Duser.timezone=UTC \ + -Djava.awt.headless=true \ + -Dfile.encoding=UTF-8 \ + -XX:MaxPermSize=256m \ + -Xmx1g \ + -noverify \ + -jar ${JAR} \ + "$@" +fi -- cgit v1.2.3 From d7d95a099f5f215aff465f948446a8b68113b391 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 4 Jan 2014 21:45:22 -0800 Subject: And update docs to match --- .gitignore | 2 +- README.md | 4 ++-- sbt/sbt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'sbt') diff --git a/.gitignore b/.gitignore index 1692bde20f..39635d7eef 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ *.iml *.iws .idea/ -.sbtlib/*.jar +sbt/*.jar .settings .cache /build/ diff --git a/README.md b/README.md index db1e2c4c0a..2c08a4ac63 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ This README file only contains basic setup instructions. Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT), which can be obtained [here](http://www.scala-sbt.org). To build Spark and its example programs, run: - ./sbt assembly + ./sbt/sbt assembly Once you've built Spark, the easiest way to start using it is the shell: @@ -41,7 +41,7 @@ locally with one thread, or "local[N]" to run locally with N threads. Testing first requires [Building](#Building) Spark. Once Spark is built, tests can be run using: -`./sbt test` +`./sbt/sbt test` ## A Note About Hadoop Versions diff --git a/sbt/sbt b/sbt/sbt index d21806ed83..a7146e3b05 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -5,7 +5,7 @@ SBT_VERSION=0.12.4 URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar -JAR=.sbtlib/sbt-launch-${SBT_VERSION}.jar +JAR=sbt/sbt-launch-${SBT_VERSION}.jar printf "Checking for system sbt [" if hash sbt 2>/dev/null; then -- cgit v1.2.3 From df92f1c0254dc9073c18bc7b76f8b9523ecd7cec Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sat, 4 Jan 2014 21:48:35 -0800 Subject: reindent --- sbt/sbt | 62 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'sbt') diff --git a/sbt/sbt b/sbt/sbt index a7146e3b05..6d2caca120 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -9,40 +9,40 @@ JAR=sbt/sbt-launch-${SBT_VERSION}.jar printf "Checking for system sbt [" if hash sbt 2>/dev/null; then - printf "FOUND]\n" - # Use System SBT - sbt $@ + printf "FOUND]\n" + # Use System SBT + sbt $@ else - printf "NOT FOUND]\n" - # Download sbt or use already downloaded - if [ ! -d .sbtlib ]; then - mkdir .sbtlib - fi - if [ ! -f ${JAR} ]; then - # Download - printf "Attempting to fetch sbt\n" - if hash curl 2>/dev/null; then + printf "NOT FOUND]\n" + # Download sbt or use already downloaded + if [ ! -d .sbtlib ]; then + mkdir .sbtlib + fi + if [ ! -f ${JAR} ]; then + # Download + printf "Attempting to fetch sbt\n" + if hash curl 2>/dev/null; then curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR} - elif hash wget 2>/dev/null; then + elif hash wget 2>/dev/null; then wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} - else + else printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" exit - fi - fi - if [ ! -f ${JAR} ]; then - # We failed to download - printf "Our attempt to download sbt locally to {$JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" - exit - fi - printf "Launching sbt from .sbtlib\n" - java \ - -Duser.timezone=UTC \ - -Djava.awt.headless=true \ - -Dfile.encoding=UTF-8 \ - -XX:MaxPermSize=256m \ - -Xmx1g \ - -noverify \ - -jar ${JAR} \ - "$@" + fi + fi + if [ ! -f ${JAR} ]; then + # We failed to download + printf "Our attempt to download sbt locally to {$JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit + fi + printf "Launching sbt from .sbtlib\n" + java \ + -Duser.timezone=UTC \ + -Djava.awt.headless=true \ + -Dfile.encoding=UTF-8 \ + -XX:MaxPermSize=256m \ + -Xmx1g \ + -noverify \ + -jar ${JAR} \ + "$@" fi -- cgit v1.2.3 From d86dc74d796121b61ff43c632791c52dd49ff8ad Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sun, 5 Jan 2014 22:05:30 -0800 Subject: Code review feedback --- README.md | 14 ++++++++------ docs/README.md | 4 ++-- docs/_plugins/copy_api_dirs.rb | 4 ++-- docs/api.md | 2 +- docs/hadoop-third-party-distributions.md | 2 +- docs/index.md | 6 +++--- docs/python-programming-guide.md | 2 +- docs/quick-start.md | 8 ++++---- docs/running-on-yarn.md | 6 +++--- docs/scala-programming-guide.md | 2 +- make-distribution.sh | 7 +++++-- sbt/sbt | 13 ++++--------- 12 files changed, 35 insertions(+), 35 deletions(-) (limited to 'sbt') diff --git a/README.md b/README.md index 2c08a4ac63..b91e4cf867 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ This README file only contains basic setup instructions. ## Building Spark requires Scala 2.10. The project is built using Simple Build Tool (SBT), -which can be obtained [here](http://www.scala-sbt.org). To build Spark and its example programs, run: +which can be obtained [here](http://www.scala-sbt.org). If SBT is installed we +will use the system version of sbt otherwise we will attempt to download it +automatically. To build Spark and its example programs, run: ./sbt/sbt assembly @@ -55,22 +57,22 @@ For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions without YARN, use: # Apache Hadoop 1.2.1 - $ SPARK_HADOOP_VERSION=1.2.1 sbt assembly + $ SPARK_HADOOP_VERSION=1.2.1 sbt/sbt assembly # Cloudera CDH 4.2.0 with MapReduce v1 - $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt assembly + $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, also set `SPARK_YARN=true`: # Apache Hadoop 2.0.5-alpha - $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly # Cloudera CDH 4.2.0 with MapReduce v2 - $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt assembly + $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly # Apache Hadoop 2.2.X and newer - $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt assembly + $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly When developing a Spark application, specify the Hadoop version by adding the "hadoop-client" artifact to your project's dependencies. For example, if you're diff --git a/docs/README.md b/docs/README.md index e3d6c9a5bc..dfcf753553 100644 --- a/docs/README.md +++ b/docs/README.md @@ -27,10 +27,10 @@ To mark a block of code in your markdown to be syntax highlighted by jekyll duri ## API Docs (Scaladoc and Epydoc) -You can build just the Spark scaladoc by running `sbt doc` from the SPARK_PROJECT_ROOT directory. +You can build just the Spark scaladoc by running `sbt/sbt doc` from the SPARK_PROJECT_ROOT directory. Similarly, you can build just the PySpark epydoc by running `epydoc --config epydoc.conf` from the SPARK_PROJECT_ROOT/pyspark directory. -When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc. The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/). +When you run `jekyll` in the docs directory, it will also copy over the scaladoc for the various Spark subprojects into the docs directory (and then also into the _site directory). We use a jekyll plugin to run `sbt/sbt doc` before building the site so if you haven't run it (recently) it may take some time as it generates all of the scaladoc. The jekyll plugin also generates the PySpark docs using [epydoc](http://epydoc.sourceforge.net/). NOTE: To skip the step of building and copying over the Scala and Python API docs, run `SKIP_API=1 jekyll`. diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index ef9912c808..431de909cb 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -26,8 +26,8 @@ if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1') curr_dir = pwd cd("..") - puts "Running sbt doc from " + pwd + "; this may take a few minutes..." - puts `sbt doc` + puts "Running sbt/sbt doc from " + pwd + "; this may take a few minutes..." + puts `sbt/sbt doc` puts "Moving back into docs dir." cd("docs") diff --git a/docs/api.md b/docs/api.md index 11e2c15324..e86d07770a 100644 --- a/docs/api.md +++ b/docs/api.md @@ -3,7 +3,7 @@ layout: global title: Spark API documentation (Scaladoc) --- -Here you can find links to the Scaladoc generated for the Spark sbt subprojects. If the following links don't work, try running `sbt doc` from the Spark project home directory. +Here you can find links to the Scaladoc generated for the Spark sbt subprojects. If the following links don't work, try running `sbt/sbt doc` from the Spark project home directory. - [Spark](api/core/index.html) - [Spark Examples](api/examples/index.html) diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md index 141d475ba6..de6a2b0a43 100644 --- a/docs/hadoop-third-party-distributions.md +++ b/docs/hadoop-third-party-distributions.md @@ -12,7 +12,7 @@ with these distributions: When compiling Spark, you'll need to [set the SPARK_HADOOP_VERSION flag](index.html#a-note-about-hadoop-versions): - SPARK_HADOOP_VERSION=1.0.4 sbt assembly + SPARK_HADOOP_VERSION=1.0.4 sbt/sbt assembly The table below lists the corresponding `SPARK_HADOOP_VERSION` code for each CDH/HDP release. Note that some Hadoop releases are binary compatible across client versions. This means the pre-built Spark diff --git a/docs/index.md b/docs/index.md index bf8d1c3375..86d574daaa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -17,7 +17,7 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). All you n Spark uses [Simple Build Tool](http://www.scala-sbt.org), which is bundled with it. To compile the code, go into the top-level Spark directory and run - sbt assembly + sbt/sbt assembly For its Scala API, Spark {{site.SPARK_VERSION}} depends on Scala {{site.SCALA_VERSION}}. If you write applications in Scala, you will need to use this same version of Scala in your own program -- newer major versions may not work. You can get the right version of Scala from [scala-lang.org](http://www.scala-lang.org/download/). @@ -56,12 +56,12 @@ Hadoop, you must build Spark against the same version that your cluster uses. By default, Spark links to Hadoop 1.0.4. You can change this by setting the `SPARK_HADOOP_VERSION` variable when compiling: - SPARK_HADOOP_VERSION=2.2.0 sbt assembly + SPARK_HADOOP_VERSION=2.2.0 sbt/sbt assembly In addition, if you wish to run Spark on [YARN](running-on-yarn.html), set `SPARK_YARN` to `true`: - SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly Note that on Windows, you need to set the environment variables on separate lines, e.g., `set SPARK_HADOOP_VERSION=1.2.1`. diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md index 5d48cb676a..dc187b3efe 100644 --- a/docs/python-programming-guide.md +++ b/docs/python-programming-guide.md @@ -69,7 +69,7 @@ The script automatically adds the `bin/pyspark` package to the `PYTHONPATH`. The `bin/pyspark` script launches a Python interpreter that is configured to run PySpark applications. To use `pyspark` interactively, first build Spark, then launch it directly from the command line without any options: {% highlight bash %} -$ sbt assembly +$ sbt/sbt assembly $ ./bin/pyspark {% endhighlight %} diff --git a/docs/quick-start.md b/docs/quick-start.md index 9b9261cfff..153081bdaa 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -12,7 +12,7 @@ See the [programming guide](scala-programming-guide.html) for a more complete re To follow along with this guide, you only need to have successfully built Spark on one machine. Simply go into your Spark directory and run: {% highlight bash %} -$ sbt assembly +$ sbt/sbt assembly {% endhighlight %} # Interactive Analysis with the Spark Shell @@ -146,7 +146,7 @@ If you also wish to read data from Hadoop's HDFS, you will also need to add a de libraryDependencies += "org.apache.hadoop" % "hadoop-client" % "" {% endhighlight %} -Finally, for sbt to work correctly, we'll need to layout `SimpleApp.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the application's code, then use `sbt run` to execute our program. +Finally, for sbt to work correctly, we'll need to layout `SimpleApp.scala` and `simple.sbt` according to the typical directory structure. Once that is in place, we can create a JAR package containing the application's code, then use `sbt/sbt run` to execute our program. {% highlight bash %} $ find . @@ -157,8 +157,8 @@ $ find . ./src/main/scala ./src/main/scala/SimpleApp.scala -$ sbt package -$ sbt run +$ sbt/sbt package +$ sbt/sbt run ... Lines with a: 46, Lines with b: 23 {% endhighlight %} diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index a35e003cdc..717071d72c 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -12,7 +12,7 @@ was added to Spark in version 0.6.0, and improved in 0.7.0 and 0.8.0. We need a consolidated Spark JAR (which bundles all the required dependencies) to run Spark jobs on a YARN cluster. This can be built by setting the Hadoop version and `SPARK_YARN` environment variable, as follows: - SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly + SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly The assembled JAR will be something like this: `./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly_{{site.SPARK_VERSION}}-hadoop2.0.5.jar`. @@ -25,7 +25,7 @@ The build process now also supports new YARN versions (2.2.x). See below. - The assembled jar can be installed into HDFS or used locally. - Your application code must be packaged into a separate JAR file. -If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. +If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. # Configuration @@ -72,7 +72,7 @@ The command to launch the YARN Client is as follows: For example: # Build the Spark assembly JAR and the Spark examples JAR - $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt assembly + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true sbt/sbt assembly # Configure logging $ cp conf/log4j.properties.template conf/log4j.properties diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md index 3d0e8923d5..c1ef46a1cd 100644 --- a/docs/scala-programming-guide.md +++ b/docs/scala-programming-guide.md @@ -31,7 +31,7 @@ In addition, if you wish to access an HDFS cluster, you need to add a dependency artifactId = hadoop-client version = -For other build systems, you can run `sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions). +For other build systems, you can run `sbt/sbt assembly` to pack Spark and its dependencies into one JAR (`assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop*.jar`), then add this to your CLASSPATH. Set the HDFS version as described [here](index.html#a-note-about-hadoop-versions). Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines: diff --git a/make-distribution.sh b/make-distribution.sh index 6c466c8a06..61e6654dcb 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -44,13 +44,16 @@ DISTDIR="$FWDIR/dist" # Get version from SBT export TERM=dumb # Prevents color codes in SBT output -if ! test `which sbt` ;then +VERSIONSTRING=$FWDIR/sbt/sbt "show version" + +if [ $? == -1 ] ;then echo -e "You need sbt installed and available on your path." echo -e "Download sbt from http://www.scala-sbt.org/" exit -1; fi -VERSION=$(sbt "show version" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') +VERSION=$(echo "${VERSIONSTRING}" | tail -1 | cut -f 2 | sed 's/^\([a-zA-Z0-9.-]*\).*/\1/') +echo "Version is ${VERSION}" # Initialize defaults SPARK_HADOOP_VERSION=1.0.4 diff --git a/sbt/sbt b/sbt/sbt index 6d2caca120..09cc5a0b4a 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -27,22 +27,17 @@ else wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} else printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" - exit + exit -1 fi fi if [ ! -f ${JAR} ]; then # We failed to download - printf "Our attempt to download sbt locally to {$JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" - exit + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 fi printf "Launching sbt from .sbtlib\n" java \ - -Duser.timezone=UTC \ - -Djava.awt.headless=true \ - -Dfile.encoding=UTF-8 \ - -XX:MaxPermSize=256m \ - -Xmx1g \ - -noverify \ + -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m \ -jar ${JAR} \ "$@" fi -- cgit v1.2.3 From 5a598b2d7b72db7f732a63e6e92d54786f68ee1e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sun, 5 Jan 2014 22:07:32 -0800 Subject: Fix indentatation --- sbt/sbt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'sbt') diff --git a/sbt/sbt b/sbt/sbt index 09cc5a0b4a..0951e9daa6 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -16,28 +16,28 @@ else printf "NOT FOUND]\n" # Download sbt or use already downloaded if [ ! -d .sbtlib ]; then - mkdir .sbtlib + mkdir .sbtlib fi if [ ! -f ${JAR} ]; then - # Download - printf "Attempting to fetch sbt\n" - if hash curl 2>/dev/null; then - curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR} - elif hash wget 2>/dev/null; then - wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} - else - printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" - exit -1 - fi + # Download + printf "Attempting to fetch sbt\n" + if hash curl 2>/dev/null; then + curl --progress-bar ${URL1} > ${JAR} || curl --progress-bar ${URL2} > ${JAR} + elif hash wget 2>/dev/null; then + wget --progress=bar ${URL1} -O ${JAR} || wget --progress=bar ${URL2} -O ${JAR} + else + printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi fi if [ ! -f ${JAR} ]; then - # We failed to download - printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" - exit -1 + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 fi printf "Launching sbt from .sbtlib\n" java \ -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m \ - -jar ${JAR} \ - "$@" + -jar ${JAR} \ + "$@" fi -- cgit v1.2.3 From 2dc83de72e7803fbf69f55fd1edf1969bc48e074 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Sun, 5 Jan 2014 23:29:26 -0800 Subject: CR feedback (sbt -> sbt/sbt and correct JAR path in script) :) --- bin/run-example | 2 +- project/build.properties | 2 +- sbt/sbt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'sbt') diff --git a/bin/run-example b/bin/run-example index dfb4bf7baf..6c5d4a6a8f 100755 --- a/bin/run-example +++ b/bin/run-example @@ -55,7 +55,7 @@ if [ -e "$EXAMPLES_DIR"/target/spark-examples*[0-9Tg].jar ]; then fi if [[ -z $SPARK_EXAMPLES_JAR ]]; then echo "Failed to find Spark examples assembly in $FWDIR/examples/target" >&2 - echo "You need to build Spark with sbt assembly before running this program" >&2 + echo "You need to build Spark with sbt/sbt assembly before running this program" >&2 exit 1 fi diff --git a/project/build.properties b/project/build.properties index 059edac7d4..03438f6558 100644 --- a/project/build.properties +++ b/project/build.properties @@ -16,5 +16,5 @@ # # Note: If you change the sbt version please also change the sbt download -# script in the root directory of the project called sbt. +# script sbt/sbt sbt.version=0.12.4 diff --git a/sbt/sbt b/sbt/sbt index 0951e9daa6..af422d9e89 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -35,7 +35,7 @@ else printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" exit -1 fi - printf "Launching sbt from .sbtlib\n" + printf "Launching sbt from ${JAR}\n" java \ -Xmx1200m -XX:MaxPermSize=350m -XX:ReservedCodeCacheSize=256m \ -jar ${JAR} \ -- cgit v1.2.3 From b590adb2ad06fbb5f38c55aa16369c878d84839e Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 6 Jan 2014 23:31:39 -0800 Subject: Put quote arround arguments passed down to system sbt --- sbt/sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sbt') diff --git a/sbt/sbt b/sbt/sbt index af422d9e89..2c19987b92 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -11,7 +11,7 @@ printf "Checking for system sbt [" if hash sbt 2>/dev/null; then printf "FOUND]\n" # Use System SBT - sbt $@ + sbt "$@" else printf "NOT FOUND]\n" # Download sbt or use already downloaded -- cgit v1.2.3 From 60a7a6b31a441f5f716091316f90819c62b22e70 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Mon, 6 Jan 2014 23:45:27 -0800 Subject: Use awk to extract the version --- project/build.properties | 3 --- sbt/sbt | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'sbt') diff --git a/project/build.properties b/project/build.properties index 03438f6558..839f5fbb0c 100644 --- a/project/build.properties +++ b/project/build.properties @@ -14,7 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -# Note: If you change the sbt version please also change the sbt download -# script sbt/sbt sbt.version=0.12.4 diff --git a/sbt/sbt b/sbt/sbt index 2c19987b92..22672f2346 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -2,7 +2,7 @@ # This script launches sbt for this project. If present it uses the system # version of sbt. If there is no system version of sbt it attempts to download # sbt locally. -SBT_VERSION=0.12.4 +SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties` URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar JAR=sbt/sbt-launch-${SBT_VERSION}.jar -- cgit v1.2.3 From 226b58ada2a3a658420ad90f743d8873f1c30445 Mon Sep 17 00:00:00 2001 From: Henry Saputra Date: Tue, 7 Jan 2014 21:07:27 -0800 Subject: Add ASF header to the new sbt script. --- sbt/sbt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'sbt') diff --git a/sbt/sbt b/sbt/sbt index 22672f2346..7f47d90cf1 100755 --- a/sbt/sbt +++ b/sbt/sbt @@ -1,4 +1,22 @@ #!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + # This script launches sbt for this project. If present it uses the system # version of sbt. If there is no system version of sbt it attempts to download # sbt locally. -- cgit v1.2.3