aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorpshearer <pshearer@massmutual.com>2016-04-30 10:15:20 +0100
committerSean Owen <sowen@cloudera.com>2016-04-30 10:15:20 +0100
commit0368ff30dd55dd2127d4cb196898c7bd437e9d28 (patch)
treec432cd9e41334ba63c5e1a4d274794fbad22a4bc /bin
parent8dc3987d095ae01ad80c89b8f052f231e0807990 (diff)
downloadspark-0368ff30dd55dd2127d4cb196898c7bd437e9d28.tar.gz
spark-0368ff30dd55dd2127d4cb196898c7bd437e9d28.tar.bz2
spark-0368ff30dd55dd2127d4cb196898c7bd437e9d28.zip
[SPARK-13973][PYSPARK] Make pyspark fail noisily if IPYTHON or IPYTHON_OPTS are set
## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-13973 Following discussion with srowen the IPYTHON and IPYTHON_OPTS variables are removed. If they are set in the user's environment, pyspark will not execute and prints an error message. Failing noisily will force users to remove these options and learn the new configuration scheme, which is much more sustainable and less confusing. ## How was this patch tested? Manual testing; set IPYTHON=1 and verified that the error message prints. Author: pshearer <pshearer@massmutual.com> Author: shearerp <shearerp@umich.edu> Closes #12528 from shearerp/master.
Diffstat (limited to 'bin')
-rwxr-xr-xbin/pyspark32
1 files changed, 12 insertions, 20 deletions
diff --git a/bin/pyspark b/bin/pyspark
index a25749964e..d1fe75a08b 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -24,17 +24,11 @@ fi
source "${SPARK_HOME}"/bin/load-spark-env.sh
export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
-# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
-# executable, while the worker would still be launched using PYSPARK_PYTHON.
-#
-# In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
-# PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
-# Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
-# PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
+# In Spark 2.0, IPYTHON and IPYTHON_OPTS are removed and pyspark fails to launch if either option
+# is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython
+# to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
# and executor Python executables.
-#
-# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
# Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
if hash python2.7 2>/dev/null; then
@@ -44,17 +38,15 @@ else
DEFAULT_PYTHON="python"
fi
-# Determine the Python executable to use for the driver:
-if [[ -n "$IPYTHON_OPTS" || "$IPYTHON" == "1" ]]; then
- # If IPython options are specified, assume user wants to run IPython
- # (for backwards-compatibility)
- PYSPARK_DRIVER_PYTHON_OPTS="$PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS"
- if [ -x "$(command -v jupyter)" ]; then
- PYSPARK_DRIVER_PYTHON="jupyter"
- else
- PYSPARK_DRIVER_PYTHON="ipython"
- fi
-elif [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+# Fail noisily if removed options are set
+if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then
+ echo "Error in pyspark startup:"
+ echo "IPYTHON and IPYTHON_OPTS are removed in Spark 2.0+. Remove these from the environment and set PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS instead."
+ exit 1
+fi
+
+# Default to standard python interpreter unless told otherwise
+if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"$DEFAULT_PYTHON"}"
fi