aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/find_spark_home.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/find_spark_home.py')
-rwxr-xr-xpython/pyspark/find_spark_home.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py
new file mode 100755
index 0000000000..212a618b76
--- /dev/null
+++ b/python/pyspark/find_spark_home.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script attempt to determine the correct setting for SPARK_HOME given
+# that Spark may have been installed on the system with pip.
+
+from __future__ import print_function
+import os
+import sys
+
+
+def _find_spark_home():
+ """Find the SPARK_HOME."""
+ # If the enviroment has SPARK_HOME set trust it.
+ if "SPARK_HOME" in os.environ:
+ return os.environ["SPARK_HOME"]
+
+ def is_spark_home(path):
+ """Takes a path and returns true if the provided path could be a reasonable SPARK_HOME"""
+ return (os.path.isfile(os.path.join(path, "bin/spark-submit")) and
+ (os.path.isdir(os.path.join(path, "jars")) or
+ os.path.isdir(os.path.join(path, "assembly"))))
+
+ paths = ["../", os.path.dirname(os.path.realpath(__file__))]
+
+ # Add the path of the PySpark module if it exists
+ if sys.version < "3":
+ import imp
+ try:
+ module_home = imp.find_module("pyspark")[1]
+ paths.append(module_home)
+ # If we are installed in edit mode also look two dirs up
+ paths.append(os.path.join(module_home, "../../"))
+ except ImportError:
+ # Not pip installed no worries
+ pass
+ else:
+ from importlib.util import find_spec
+ try:
+ module_home = os.path.dirname(find_spec("pyspark").origin)
+ paths.append(module_home)
+ # If we are installed in edit mode also look two dirs up
+ paths.append(os.path.join(module_home, "../../"))
+ except ImportError:
+ # Not pip installed no worries
+ pass
+
+ # Normalize the paths
+ paths = [os.path.abspath(p) for p in paths]
+
+ try:
+ return next(path for path in paths if is_spark_home(path))
+ except StopIteration:
+ print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr)
+ exit(-1)
+
+if __name__ == "__main__":
+ print(_find_spark_home())