aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/pyspark51
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala8
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala4
-rw-r--r--docs/programming-guide.md8
4 files changed, 51 insertions, 20 deletions
diff --git a/bin/pyspark b/bin/pyspark
index 6655725ef8..96f30a260a 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -50,22 +50,47 @@ fi
. "$FWDIR"/bin/load-spark-env.sh
-# Figure out which Python executable to use
+# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
+# executable, while the worker would still be launched using PYSPARK_PYTHON.
+#
+# In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
+# PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
+# Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
+# PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
+# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
+# and executor Python executables.
+#
+# For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
+
+# Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
+if hash python2.7 2>/dev/null; then
+ # Attempt to use Python 2.7, if installed:
+ DEFAULT_PYTHON="python2.7"
+else
+ DEFAULT_PYTHON="python"
+fi
+
+# Determine the Python executable to use for the driver:
+if [[ -n "$IPYTHON_OPTS" || "$IPYTHON" == "1" ]]; then
+ # If IPython options are specified, assume user wants to run IPython
+ # (for backwards-compatibility)
+ PYSPARK_DRIVER_PYTHON_OPTS="$PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS"
+ PYSPARK_DRIVER_PYTHON="ipython"
+elif [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+ PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"$DEFAULT_PYTHON"}"
+fi
+
+# Determine the Python executable to use for the executors:
if [[ -z "$PYSPARK_PYTHON" ]]; then
- if [[ "$IPYTHON" = "1" || -n "$IPYTHON_OPTS" ]]; then
- # for backward compatibility
- PYSPARK_PYTHON="ipython"
+ if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && $DEFAULT_PYTHON != "python2.7" ]]; then
+ echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
+ exit 1
else
- PYSPARK_PYTHON="python"
+ PYSPARK_PYTHON="$DEFAULT_PYTHON"
fi
fi
export PYSPARK_PYTHON
-if [[ -z "$PYSPARK_PYTHON_OPTS" && -n "$IPYTHON_OPTS" ]]; then
- # for backward compatibility
- PYSPARK_PYTHON_OPTS="$IPYTHON_OPTS"
-fi
-
# Add the PySpark classes to the Python path:
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
@@ -93,9 +118,9 @@ if [[ -n "$SPARK_TESTING" ]]; then
unset YARN_CONF_DIR
unset HADOOP_CONF_DIR
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
- exec "$PYSPARK_PYTHON" -m doctest $1
+ exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1
else
- exec "$PYSPARK_PYTHON" $1
+ exec "$PYSPARK_DRIVER_PYTHON" $1
fi
exit
fi
@@ -111,5 +136,5 @@ if [[ "$1" =~ \.py$ ]]; then
else
# PySpark shell requires special handling downstream
export PYSPARK_SHELL=1
- exec "$PYSPARK_PYTHON" $PYSPARK_PYTHON_OPTS
+ exec "$PYSPARK_DRIVER_PYTHON" $PYSPARK_DRIVER_PYTHON_OPTS
fi
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 71bdf0fe1b..e314408c06 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -108,10 +108,12 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
// Create and start the worker
- val pb = new ProcessBuilder(Seq(pythonExec, "-u", "-m", "pyspark.worker"))
+ val pb = new ProcessBuilder(Seq(pythonExec, "-m", "pyspark.worker"))
val workerEnv = pb.environment()
workerEnv.putAll(envVars)
workerEnv.put("PYTHONPATH", pythonPath)
+ // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
+ workerEnv.put("PYTHONUNBUFFERED", "YES")
val worker = pb.start()
// Redirect worker stdout and stderr
@@ -149,10 +151,12 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
try {
// Create and start the daemon
- val pb = new ProcessBuilder(Seq(pythonExec, "-u", "-m", "pyspark.daemon"))
+ val pb = new ProcessBuilder(Seq(pythonExec, "-m", "pyspark.daemon"))
val workerEnv = pb.environment()
workerEnv.putAll(envVars)
workerEnv.put("PYTHONPATH", pythonPath)
+ // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
+ workerEnv.put("PYTHONUNBUFFERED", "YES")
daemon = pb.start()
val in = new DataInputStream(daemon.getInputStream)
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 79b4d7ea41..af94b05ce3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -34,7 +34,8 @@ object PythonRunner {
val pythonFile = args(0)
val pyFiles = args(1)
val otherArgs = args.slice(2, args.length)
- val pythonExec = sys.env.get("PYSPARK_PYTHON").getOrElse("python") // TODO: get this from conf
+ val pythonExec =
+ sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", sys.env.getOrElse("PYSPARK_PYTHON", "python"))
// Format python file paths before adding them to the PYTHONPATH
val formattedPythonFile = formatPath(pythonFile)
@@ -57,6 +58,7 @@ object PythonRunner {
val builder = new ProcessBuilder(Seq(pythonExec, formattedPythonFile) ++ otherArgs)
val env = builder.environment()
env.put("PYTHONPATH", pythonPath)
+ // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string
env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort)
builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 8e8cc1dd98..18420afb27 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -211,17 +211,17 @@ For a complete list of options, run `pyspark --help`. Behind the scenes,
It is also possible to launch the PySpark shell in [IPython](http://ipython.org), the
enhanced Python interpreter. PySpark works with IPython 1.0.0 and later. To
-use IPython, set the `PYSPARK_PYTHON` variable to `ipython` when running `bin/pyspark`:
+use IPython, set the `PYSPARK_DRIVER_PYTHON` variable to `ipython` when running `bin/pyspark`:
{% highlight bash %}
-$ PYSPARK_PYTHON=ipython ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark
{% endhighlight %}
-You can customize the `ipython` command by setting `PYSPARK_PYTHON_OPTS`. For example, to launch
+You can customize the `ipython` command by setting `PYSPARK_DRIVER_PYTHON_OPTS`. For example, to launch
the [IPython Notebook](http://ipython.org/notebook.html) with PyLab plot support:
{% highlight bash %}
-$ PYSPARK_PYTHON=ipython PYSPARK_PYTHON_OPTS="notebook --pylab inline" ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=ipython PYSPARK_DRIVER_PYTHON_OPTS="notebook --pylab inline" ./bin/pyspark
{% endhighlight %}
</div>