aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-02-17 12:05:06 -0800
committerPatrick Wendell <patrick@databricks.com>2015-02-17 12:05:06 -0800
commitfc4eb9505adda192eb38cb4454d532027690bfa3 (patch)
tree6414f66156df666029dca51a4a21b8435653da7d
parent49c19fdbad57f0609bbcc9278f9eaa8115a73604 (diff)
downloadspark-fc4eb9505adda192eb38cb4454d532027690bfa3.tar.gz
spark-fc4eb9505adda192eb38cb4454d532027690bfa3.tar.bz2
spark-fc4eb9505adda192eb38cb4454d532027690bfa3.zip
[SPARK-5864] [PySpark] support .jar as python package
A jar file containing Python sources in it could be used as a Python package, just like zip file. spark-submit already put the jar file into PYTHONPATH, this patch also put it in the sys.path, then it could be used in Python worker. Author: Davies Liu <davies@databricks.com> Closes #4652 from davies/jar and squashes the following commits: 17d3f76 [Davies Liu] support .jar as python package
-rw-r--r--python/pyspark/context.py6
1 files changed, 4 insertions, 2 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index bf1f61c850..40b3152b23 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -64,6 +64,8 @@ class SparkContext(object):
_lock = Lock()
_python_includes = None # zip and egg files that need to be added to PYTHONPATH
+ PACKAGE_EXTENSIONS = ('.zip', '.egg', '.jar')
+
def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
environment=None, batchSize=0, serializer=PickleSerializer(), conf=None,
gateway=None, jsc=None, profiler_cls=BasicProfiler):
@@ -185,7 +187,7 @@ class SparkContext(object):
for path in self._conf.get("spark.submit.pyFiles", "").split(","):
if path != "":
(dirname, filename) = os.path.split(path)
- if filename.lower().endswith("zip") or filename.lower().endswith("egg"):
+ if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
self._python_includes.append(filename)
sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
@@ -705,7 +707,7 @@ class SparkContext(object):
self.addFile(path)
(dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix
- if filename.endswith('.zip') or filename.endswith('.ZIP') or filename.endswith('.egg'):
+ if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
self._python_includes.append(filename)
# for tests in local mode
sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))