aboutsummaryrefslogtreecommitdiff
path: root/bin/pyspark
diff options
context:
space:
mode:
authorjerryshao <sshao@hortonworks.com>2015-11-04 10:49:34 +0000
committerSean Owen <sowen@cloudera.com>2015-11-04 10:49:34 +0000
commit8aff36e91de0fee2f3f56c6d240bb203b5bb48ba (patch)
tree0afdded361cb75e7658053953abfdb484da78ced /bin/pyspark
parent2692bdb7dbf36d6247f595d5fd0cb9cda89e1fdd (diff)
downloadspark-8aff36e91de0fee2f3f56c6d240bb203b5bb48ba.tar.gz
spark-8aff36e91de0fee2f3f56c6d240bb203b5bb48ba.tar.bz2
spark-8aff36e91de0fee2f3f56c6d240bb203b5bb48ba.zip
[SPARK-2960][DEPLOY] Support executing Spark from symlinks (reopen)
This PR is based on the work of roji to support running Spark scripts from symlinks. Thanks for the great work roji . Would you mind taking a look at this PR, thanks a lot. For releases like HDP and others, normally it will expose the Spark executables as symlinks and put in `PATH`, but current Spark's scripts do not support finding real path from symlink recursively, this will make spark fail to execute from symlink. This PR try to solve this issue by finding the absolute path from symlink. Instead of using `readlink -f` like what this PR (https://github.com/apache/spark/pull/2386) implemented is that `-f` is not support for Mac, so here manually seeking the path through loop. I've tested with Mac and Linux (Cent OS), looks fine. This PR did not fix the scripts under `sbin` folder, not sure if it needs to be fixed also? Please help to review, any comment is greatly appreciated. Author: jerryshao <sshao@hortonworks.com> Author: Shay Rojansky <roji@roji.org> Closes #8669 from jerryshao/SPARK-2960.
Diffstat (limited to 'bin/pyspark')
-rwxr-xr-xbin/pyspark14
1 files changed, 8 insertions, 6 deletions
diff --git a/bin/pyspark b/bin/pyspark
index 18012ee4a0..5eaa17d3c2 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -17,9 +17,11 @@
# limitations under the License.
#
-export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+if [ -z "${SPARK_HOME}" ]; then
+ export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
-source "$SPARK_HOME"/bin/load-spark-env.sh
+source "${SPARK_HOME}"/bin/load-spark-env.sh
export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
@@ -64,12 +66,12 @@ fi
export PYSPARK_PYTHON
# Add the PySpark classes to the Python path:
-export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
-export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
# Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
-export PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"
+export PYTHONSTARTUP="${SPARK_HOME}/python/pyspark/shell.py"
# For pyspark tests
if [[ -n "$SPARK_TESTING" ]]; then
@@ -82,4 +84,4 @@ fi
export PYSPARK_DRIVER_PYTHON
export PYSPARK_DRIVER_PYTHON_OPTS
-exec "$SPARK_HOME"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"
+exec "${SPARK_HOME}"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"