diff options
author | Davies Liu <davies@databricks.com> | 2015-02-17 12:05:06 -0800 |
---|---|---|
committer | Patrick Wendell <patrick@databricks.com> | 2015-02-17 12:05:06 -0800 |
commit | fc4eb9505adda192eb38cb4454d532027690bfa3 (patch) | |
tree | 6414f66156df666029dca51a4a21b8435653da7d /python/pyspark | |
parent | 49c19fdbad57f0609bbcc9278f9eaa8115a73604 (diff) | |
download | spark-fc4eb9505adda192eb38cb4454d532027690bfa3.tar.gz spark-fc4eb9505adda192eb38cb4454d532027690bfa3.tar.bz2 spark-fc4eb9505adda192eb38cb4454d532027690bfa3.zip |
[SPARK-5864] [PySpark] support .jar as python package
A jar file containing Python sources in it could be used as a Python package, just like zip file.
spark-submit already put the jar file into PYTHONPATH, this patch also put it in the sys.path, then it could be used in Python worker.
Author: Davies Liu <davies@databricks.com>
Closes #4652 from davies/jar and squashes the following commits:
17d3f76 [Davies Liu] support .jar as python package
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/context.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index bf1f61c850..40b3152b23 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -64,6 +64,8 @@ class SparkContext(object): _lock = Lock() _python_includes = None # zip and egg files that need to be added to PYTHONPATH + PACKAGE_EXTENSIONS = ('.zip', '.egg', '.jar') + def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None, environment=None, batchSize=0, serializer=PickleSerializer(), conf=None, gateway=None, jsc=None, profiler_cls=BasicProfiler): @@ -185,7 +187,7 @@ class SparkContext(object): for path in self._conf.get("spark.submit.pyFiles", "").split(","): if path != "": (dirname, filename) = os.path.split(path) - if filename.lower().endswith("zip") or filename.lower().endswith("egg"): + if filename[-4:].lower() in self.PACKAGE_EXTENSIONS: self._python_includes.append(filename) sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename)) @@ -705,7 +707,7 @@ class SparkContext(object): self.addFile(path) (dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix - if filename.endswith('.zip') or filename.endswith('.ZIP') or filename.endswith('.egg'): + if filename[-4:].lower() in self.PACKAGE_EXTENSIONS: self._python_includes.append(filename) # for tests in local mode sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename)) |