aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKousuke Saruta <sarutak@oss.nttdata.co.jp>2014-08-09 21:10:43 -0700
committerPatrick Wendell <pwendell@gmail.com>2014-08-09 21:11:00 -0700
commit4f4a9884d9268ba9808744b3d612ac23c75f105a (patch)
tree8cf35287ff81866efd3b3ce95d45ce08891b21a0
parente45daf226d780f4a7aaabc2de9f04367bee16f26 (diff)
downloadspark-4f4a9884d9268ba9808744b3d612ac23c75f105a.tar.gz
spark-4f4a9884d9268ba9808744b3d612ac23c75f105a.tar.bz2
spark-4f4a9884d9268ba9808744b3d612ac23c75f105a.zip
[SPARK-2894] spark-shell doesn't accept flags
As sryza reported, spark-shell doesn't accept any flags. The root cause is wrong usage of spark-submit in spark-shell and it come to the surface by #1801 Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp> Author: Cheng Lian <lian.cs.zju@gmail.com> Closes #1715, Closes #1864, and Closes #1861 Closes #1825 from sarutak/SPARK-2894 and squashes the following commits: 47f3510 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-2894 2c899ed [Kousuke Saruta] Removed useless code from java_gateway.py 98287ed [Kousuke Saruta] Removed useless code from java_gateway.py 513ad2e [Kousuke Saruta] Modified util.sh to enable to use option including white spaces 28a374e [Kousuke Saruta] Modified java_gateway.py to recognize arguments 5afc584 [Cheng Lian] Filter out spark-submit options when starting Python gateway e630d19 [Cheng Lian] Fixing pyspark and spark-shell CLI options
-rwxr-xr-xbin/pyspark18
-rwxr-xr-xbin/spark-shell20
-rw-r--r--bin/utils.sh59
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala4
-rwxr-xr-xdev/merge_spark_pr.py2
-rw-r--r--python/pyspark/java_gateway.py2
6 files changed, 94 insertions, 11 deletions
diff --git a/bin/pyspark b/bin/pyspark
index 39a20e2a24..01d42025c9 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
# Export this as SPARK_HOME
export SPARK_HOME="$FWDIR"
+source $FWDIR/bin/utils.sh
+
SCALA_VERSION=2.10
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+function usage() {
echo "Usage: ./bin/pyspark [options]" 1>&2
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+ usage
fi
# Exit if the user hasn't compiled Spark
@@ -66,10 +72,11 @@ fi
# Build up arguments list manually to preserve quotes and backslashes.
# We export Spark submit arguments as an environment variable because shell.py must run as a
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
-
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
PYSPARK_SUBMIT_ARGS=""
whitespace="[[:space:]]"
-for i in "$@"; do
+for i in "${SUBMISSION_OPTS[@]}"; do
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -90,7 +97,10 @@ fi
if [[ "$1" =~ \.py$ ]]; then
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
- exec $FWDIR/bin/spark-submit "$@"
+ primary=$1
+ shift
+ gatherSparkSubmitOpts "$@"
+ exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
else
# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" ]]; then
diff --git a/bin/spark-shell b/bin/spark-shell
index 756c8179d1..8b7ccd7439 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -31,13 +31,21 @@ set -o posix
## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"
+function usage() {
+ echo "Usage: ./bin/spark-shell [options]"
+ $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+ exit 0
+}
+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
- echo "Usage: ./bin/spark-shell [options]"
- $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
- exit 0
+ usage
fi
-function main(){
+source $FWDIR/bin/utils.sh
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
+
+function main() {
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
@@ -46,11 +54,11 @@ function main(){
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
- $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
+ $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
- $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
+ $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
}
diff --git a/bin/utils.sh b/bin/utils.sh
new file mode 100644
index 0000000000..0804b1ed9f
--- /dev/null
+++ b/bin/utils.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Gather all all spark-submit options into SUBMISSION_OPTS
+function gatherSparkSubmitOpts() {
+
+ if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
+ echo "Function for printing usage of $0 is not set." 1>&2
+ echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
+ exit 1
+ fi
+
+ # NOTE: If you add or remove spark-sumbmit options,
+ # modify NOT ONLY this script but also SparkSubmitArgument.scala
+ SUBMISSION_OPTS=()
+ APPLICATION_OPTS=()
+ while (($#)); do
+ case "$1" in
+ --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
+ --conf | --properties-file | --driver-memory | --driver-java-options | \
+ --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
+ --total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
+ if [[ $# -lt 2 ]]; then
+ "$SUBMIT_USAGE_FUNCTION"
+ exit 1;
+ fi
+ SUBMISSION_OPTS+=("$1"); shift
+ SUBMISSION_OPTS+=("$1"); shift
+ ;;
+
+ --verbose | -v | --supervise)
+ SUBMISSION_OPTS+=("$1"); shift
+ ;;
+
+ *)
+ APPLICATION_OPTS+=("$1"); shift
+ ;;
+ esac
+ done
+
+ export SUBMISSION_OPTS
+ export APPLICATION_OPTS
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index c21f1529a1..d545f58c5d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -224,6 +224,10 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
// Delineates parsing of Spark options from parsing of user options.
parse(opts)
+ /**
+ * NOTE: If you add or remove spark-submit options,
+ * modify NOT ONLY this file but also utils.sh
+ */
def parse(opts: Seq[String]): Unit = opts match {
case ("--name") :: value :: tail =>
name = value
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 53df9b5a3f..d48c8bde12 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -74,8 +74,10 @@ def fail(msg):
def run_cmd(cmd):
if isinstance(cmd, list):
+ print " ".join(cmd)
return subprocess.check_output(cmd)
else:
+ print cmd
return subprocess.check_output(cmd.split(" "))
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 37386ab0d7..c7f7c1fe59 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -39,7 +39,7 @@ def launch_gateway():
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
submit_args = submit_args if submit_args is not None else ""
submit_args = shlex.split(submit_args)
- command = [os.path.join(SPARK_HOME, script), "pyspark-shell"] + submit_args
+ command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"]
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():