aboutsummaryrefslogtreecommitdiff
path: root/bin/spark-class
diff options
context:
space:
mode:
Diffstat (limited to 'bin/spark-class')
-rwxr-xr-xbin/spark-class180
1 files changed, 37 insertions, 143 deletions
diff --git a/bin/spark-class b/bin/spark-class
index 2f0441bb3c..e29b234afa 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -16,89 +16,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
-# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
-
-cygwin=false
-case "`uname`" in
- CYGWIN*) cygwin=true;;
-esac
+set -e
# Figure out where Spark is installed
-FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
-# Export this as SPARK_HOME
-export SPARK_HOME="$FWDIR"
-export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"$SPARK_HOME/conf"}"
-
-. "$FWDIR"/bin/load-spark-env.sh
+. "$SPARK_HOME"/bin/load-spark-env.sh
if [ -z "$1" ]; then
echo "Usage: spark-class <class> [<args>]" 1>&2
exit 1
fi
-if [ -n "$SPARK_MEM" ]; then
- echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
- echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
-fi
-
-# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
-DEFAULT_MEM=${SPARK_MEM:-512m}
-
-SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
-
-# Add java opts and memory settings for master, worker, history server, executors, and repl.
-case "$1" in
- # Master, Worker, and HistoryServer use SPARK_DAEMON_JAVA_OPTS (and specific opts) + SPARK_DAEMON_MEMORY.
- 'org.apache.spark.deploy.master.Master')
- OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_MASTER_OPTS"
- OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
- ;;
- 'org.apache.spark.deploy.worker.Worker')
- OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_WORKER_OPTS"
- OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
- ;;
- 'org.apache.spark.deploy.history.HistoryServer')
- OUR_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS $SPARK_HISTORY_OPTS"
- OUR_JAVA_MEM=${SPARK_DAEMON_MEMORY:-$DEFAULT_MEM}
- ;;
-
- # Executors use SPARK_JAVA_OPTS + SPARK_EXECUTOR_MEMORY.
- 'org.apache.spark.executor.CoarseGrainedExecutorBackend')
- OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
- OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
- ;;
- 'org.apache.spark.executor.MesosExecutorBackend')
- OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
- OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
- export PYTHONPATH="$FWDIR/python:$PYTHONPATH"
- export PYTHONPATH="$FWDIR/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
- ;;
-
- # Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
- # SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
- 'org.apache.spark.deploy.SparkSubmit')
- OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
- OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
- if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
- if [[ $OSTYPE == darwin* ]]; then
- export DYLD_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$DYLD_LIBRARY_PATH"
- else
- export LD_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:$LD_LIBRARY_PATH"
- fi
- fi
- if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
- OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
- fi
- ;;
-
- *)
- OUR_JAVA_OPTS="$SPARK_JAVA_OPTS"
- OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
- ;;
-esac
-
# Find the java binary
if [ -n "${JAVA_HOME}" ]; then
RUNNER="${JAVA_HOME}/bin/java"
@@ -110,83 +39,48 @@ else
exit 1
fi
fi
-JAVA_VERSION=$("$RUNNER" -version 2>&1 | grep 'version' | sed 's/.* version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
-
-# Set JAVA_OPTS to be able to load native libraries and to set heap size
-if [ "$JAVA_VERSION" -ge 18 ]; then
- JAVA_OPTS="$OUR_JAVA_OPTS"
-else
- JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
-fi
-JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
-
-# Load extra JAVA_OPTS from conf/java-opts, if it exists
-if [ -e "$SPARK_CONF_DIR/java-opts" ] ; then
- JAVA_OPTS="$JAVA_OPTS `cat "$SPARK_CONF_DIR"/java-opts`"
-fi
-
-# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
-
-TOOLS_DIR="$FWDIR"/tools
-SPARK_TOOLS_JAR=""
-if [ -e "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
- # Use the JAR from the SBT build
- export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SPARK_SCALA_VERSION/spark-tools*[0-9Tg].jar`"
-fi
-if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
- # Use the JAR from the Maven build
- # TODO: this also needs to become an assembly!
- export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`"
-fi
-# Compute classpath using external script
-classpath_output=$("$FWDIR"/bin/compute-classpath.sh)
-if [[ "$?" != "0" ]]; then
- echo "$classpath_output"
- exit 1
-else
- CLASSPATH="$classpath_output"
-fi
+# Look for the launcher. In non-release mode, add the compiled classes directly to the classpath
+# instead of looking for a jar file.
+SPARK_LAUNCHER_CP=
+if [ -f $SPARK_HOME/RELEASE ]; then
+ LAUNCHER_DIR="$SPARK_HOME/lib"
+ num_jars="$(ls -1 "$LAUNCHER_DIR" | grep "^spark-launcher.*\.jar$" | wc -l)"
+ if [ "$num_jars" -eq "0" -a -z "$SPARK_LAUNCHER_CP" ]; then
+ echo "Failed to find Spark launcher in $LAUNCHER_DIR." 1>&2
+ echo "You need to build Spark before running this program." 1>&2
+ exit 1
+ fi
-if [[ "$1" =~ org.apache.spark.tools.* ]]; then
- if test -z "$SPARK_TOOLS_JAR"; then
- echo "Failed to find Spark Tools Jar in $FWDIR/tools/target/scala-$SPARK_SCALA_VERSION/" 1>&2
- echo "You need to run \"build/sbt tools/package\" before running $1." 1>&2
+ LAUNCHER_JARS="$(ls -1 "$LAUNCHER_DIR" | grep "^spark-launcher.*\.jar$" || true)"
+ if [ "$num_jars" -gt "1" ]; then
+ echo "Found multiple Spark launcher jars in $LAUNCHER_DIR:" 1>&2
+ echo "$LAUNCHER_JARS" 1>&2
+ echo "Please remove all but one jar." 1>&2
exit 1
fi
- CLASSPATH="$CLASSPATH:$SPARK_TOOLS_JAR"
-fi
-if $cygwin; then
- CLASSPATH="`cygpath -wp "$CLASSPATH"`"
- if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
- export SPARK_TOOLS_JAR="`cygpath -w "$SPARK_TOOLS_JAR"`"
+ SPARK_LAUNCHER_CP="${LAUNCHER_DIR}/${LAUNCHER_JARS}"
+else
+ LAUNCHER_DIR="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION"
+ if [ ! -d "$LAUNCHER_DIR/classes" ]; then
+ echo "Failed to find Spark launcher classes in $LAUNCHER_DIR." 1>&2
+ echo "You need to build Spark before running this program." 1>&2
+ exit 1
fi
+ SPARK_LAUNCHER_CP="$LAUNCHER_DIR/classes"
fi
-export CLASSPATH
-# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
-# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
-# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
-# to prepare the launch environment of this driver JVM.
+# The launcher library will print arguments separated by a NULL character, to allow arguments with
+# characters that would be otherwise interpreted by the shell. Read that in a while loop, populating
+# an array that will be used to exec the final command.
+CMD=()
+while IFS= read -d '' -r ARG; do
+ CMD+=("$ARG")
+done < <("$RUNNER" -cp "$SPARK_LAUNCHER_CP" org.apache.spark.launcher.Main "$@")
-if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
- # This is used only if the properties file actually contains these special configs
- # Export the environment variables needed by SparkSubmitDriverBootstrapper
- export RUNNER
- export CLASSPATH
- export JAVA_OPTS
- export OUR_JAVA_MEM
- export SPARK_CLASS=1
- shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
- exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
+if [ "${CMD[0]}" = "usage" ]; then
+ "${CMD[@]}"
else
- # Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
- if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
- echo -n "Spark Command: " 1>&2
- echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
- echo -e "========================================\n" 1>&2
- fi
- exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
+ exec "${CMD[@]}"
fi
-