diff options
-rw-r--r-- | python/pyspark/__init__.py | 10 | ||||
-rw-r--r-- | python/pyspark/mllib/__init__.py | 34 | ||||
-rw-r--r-- | python/pyspark/mllib/feature.py | 8 | ||||
-rw-r--r-- | python/pyspark/mllib/linalg.py | 4 | ||||
-rw-r--r-- | python/pyspark/mllib/rand.py (renamed from python/pyspark/mllib/random.py) | 0 | ||||
-rwxr-xr-x | python/run-tests | 2 |
6 files changed, 38 insertions, 20 deletions
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index e39e6514d7..9556e4718e 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -37,16 +37,6 @@ Public classes: """ -# The following block allows us to import python's random instead of mllib.random for scripts in -# mllib that depend on top level pyspark packages, which transitively depend on python's random. -# Since Python's import logic looks for modules in the current package first, we eliminate -# mllib.random as a candidate for C{import random} by removing the first search path, the script's -# location, in order to force the loader to look in Python's top-level modules for C{random}. -import sys -s = sys.path.pop(0) -import random -sys.path.insert(0, s) - from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.rdd import RDD diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index 4149f54931..5030a655fc 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -24,3 +24,37 @@ Python bindings for MLlib. import numpy if numpy.version.version < '1.4': raise Exception("MLlib requires NumPy 1.4+") + +__all__ = ['classification', 'clustering', 'feature', 'linalg', 'random', + 'recommendation', 'regression', 'stat', 'tree', 'util'] + +import sys +import rand as random +random.__name__ = 'random' +random.RandomRDDs.__module__ = __name__ + '.random' + + +class RandomModuleHook(object): + """ + Hook to import pyspark.mllib.random + """ + fullname = __name__ + '.random' + + def find_module(self, name, path=None): + # skip all other modules + if not name.startswith(self.fullname): + return + return self + + def load_module(self, name): + if name == self.fullname: + return random + + cname = name.rsplit('.', 1)[-1] + try: + return getattr(random, cname) + except AttributeError: + raise ImportError + + +sys.meta_path.append(RandomModuleHook()) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 9ec28079ae..8cb992df2d 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -18,8 +18,11 @@ """ Python package for feature in MLlib. """ +from __future__ import absolute_import + import sys import warnings +import random from py4j.protocol import Py4JJavaError @@ -341,8 +344,6 @@ class Word2Vec(object): """ Construct Word2Vec instance """ - import random # this can't be on the top because of mllib.random - self.vectorSize = 100 self.learningRate = 0.025 self.numPartitions = 1 @@ -411,8 +412,5 @@ def _test(): exit(-1) if __name__ == "__main__": - # remove current path from list of search paths to avoid importing mllib.random - # for C{import random}, which is done in an external dependency of pyspark during doctests. - import sys sys.path.pop(0) _test() diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index e35202dca0..537b176578 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -614,8 +614,4 @@ def _test(): exit(-1) if __name__ == "__main__": - # remove current path from list of search paths to avoid importing mllib.random - # for C{import random}, which is done in an external dependency of pyspark during doctests. - import sys - sys.path.pop(0) _test() diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/rand.py index cb4304f921..cb4304f921 100644 --- a/python/pyspark/mllib/random.py +++ b/python/pyspark/mllib/rand.py diff --git a/python/run-tests b/python/run-tests index a4f0cac059..e66854b44d 100755 --- a/python/run-tests +++ b/python/run-tests @@ -72,7 +72,7 @@ function run_mllib_tests() { run_test "pyspark/mllib/clustering.py" run_test "pyspark/mllib/feature.py" run_test "pyspark/mllib/linalg.py" - run_test "pyspark/mllib/random.py" + run_test "pyspark/mllib/rand.py" run_test "pyspark/mllib/recommendation.py" run_test "pyspark/mllib/regression.py" run_test "pyspark/mllib/stat.py" |