aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/pyspark32
-rw-r--r--docs/programming-guide.md11
2 files changed, 18 insertions, 25 deletions
diff --git a/bin/pyspark b/bin/pyspark
index a25749964e..d1fe75a08b 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -24,17 +24,11 @@ fi
source "${SPARK_HOME}"/bin/load-spark-env.sh
export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
-# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
-# executable, while the worker would still be launched using PYSPARK_PYTHON.
-#
-# In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
-# PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
-# Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
-# PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
+# In Spark 2.0, IPYTHON and IPYTHON_OPTS are removed and pyspark fails to launch if either option
+# is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython
+# to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
# and executor Python executables.
-#
-# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
# Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
if hash python2.7 2>/dev/null; then
@@ -44,17 +38,15 @@ else
DEFAULT_PYTHON="python"
fi
-# Determine the Python executable to use for the driver:
-if [[ -n "$IPYTHON_OPTS" || "$IPYTHON" == "1" ]]; then
- # If IPython options are specified, assume user wants to run IPython
- # (for backwards-compatibility)
- PYSPARK_DRIVER_PYTHON_OPTS="$PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS"
- if [ -x "$(command -v jupyter)" ]; then
- PYSPARK_DRIVER_PYTHON="jupyter"
- else
- PYSPARK_DRIVER_PYTHON="ipython"
- fi
-elif [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+# Fail noisily if removed options are set
+if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then
+ echo "Error in pyspark startup:"
+ echo "IPYTHON and IPYTHON_OPTS are removed in Spark 2.0+. Remove these from the environment and set PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS instead."
+ exit 1
+fi
+
+# Default to standard python interpreter unless told otherwise
+if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"$DEFAULT_PYTHON"}"
fi
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 601dd578f4..cf6f1d8914 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -240,16 +240,17 @@ use IPython, set the `PYSPARK_DRIVER_PYTHON` variable to `ipython` when running
$ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark
{% endhighlight %}
-You can customize the `ipython` command by setting `PYSPARK_DRIVER_PYTHON_OPTS`. For example, to launch
-the [IPython Notebook](http://ipython.org/notebook.html) with PyLab plot support:
+To use the Jupyter notebook (previously known as the IPython notebook),
{% highlight bash %}
-$ PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS="notebook" ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=jupyter ./bin/pyspark
{% endhighlight %}
-After the IPython Notebook server is launched, you can create a new "Python 2" notebook from
+You can customize the `ipython` or `jupyter` commands by setting `PYSPARK_DRIVER_PYTHON_OPTS`.
+
+After the Jupyter Notebook server is launched, you can create a new "Python 2" notebook from
the "Files" tab. Inside the notebook, you can input the command `%pylab inline` as part of
-your notebook before you start to try Spark from the IPython notebook.
+your notebook before you start to try Spark from the Jupyter notebook.
</div>