aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2014-11-13 10:24:54 -0800
committerXiangrui Meng <meng@databricks.com>2014-11-13 10:24:54 -0800
commitce0333f9a008348692bb9a200449d2d992e7825e (patch)
tree03a42efe832281f0cb668d460a53153cd4bbc99b /python
parent484fecbf1402c25f310be0b0a5ec15c11cbd65c3 (diff)
downloadspark-ce0333f9a008348692bb9a200449d2d992e7825e.tar.gz
spark-ce0333f9a008348692bb9a200449d2d992e7825e.tar.bz2
spark-ce0333f9a008348692bb9a200449d2d992e7825e.zip
[SPARK-4348] [PySpark] [MLlib] rename random.py to rand.py
This PR rename random.py to rand.py to avoid the side affects of conflict with random module, but still keep the same interface as before. ``` >>> from pyspark.mllib.random import RandomRDDs ``` ``` $ pydoc pyspark.mllib.random Help on module random in pyspark.mllib: NAME random - Python package for random data generation. FILE /Users/davies/work/spark/python/pyspark/mllib/rand.py CLASSES __builtin__.object pyspark.mllib.random.RandomRDDs class RandomRDDs(__builtin__.object) | Generator methods for creating RDDs comprised of i.i.d samples from | some distribution. | | Static methods defined here: | | normalRDD(sc, size, numPartitions=None, seed=None) ``` cc mengxr reference link: http://xion.org.pl/2012/05/06/hacking-python-imports/ Author: Davies Liu <davies@databricks.com> Closes #3216 from davies/random and squashes the following commits: 7ac4e8b [Davies Liu] rename random.py to rand.py
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/__init__.py10
-rw-r--r--python/pyspark/mllib/__init__.py34
-rw-r--r--python/pyspark/mllib/feature.py8
-rw-r--r--python/pyspark/mllib/linalg.py4
-rw-r--r--python/pyspark/mllib/rand.py (renamed from python/pyspark/mllib/random.py)0
-rwxr-xr-xpython/run-tests2
6 files changed, 38 insertions, 20 deletions
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index e39e6514d7..9556e4718e 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -37,16 +37,6 @@ Public classes:
"""
-# The following block allows us to import python's random instead of mllib.random for scripts in
-# mllib that depend on top level pyspark packages, which transitively depend on python's random.
-# Since Python's import logic looks for modules in the current package first, we eliminate
-# mllib.random as a candidate for C{import random} by removing the first search path, the script's
-# location, in order to force the loader to look in Python's top-level modules for C{random}.
-import sys
-s = sys.path.pop(0)
-import random
-sys.path.insert(0, s)
-
from pyspark.conf import SparkConf
from pyspark.context import SparkContext
from pyspark.rdd import RDD
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 4149f54931..5030a655fc 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -24,3 +24,37 @@ Python bindings for MLlib.
import numpy
if numpy.version.version < '1.4':
raise Exception("MLlib requires NumPy 1.4+")
+
+__all__ = ['classification', 'clustering', 'feature', 'linalg', 'random',
+ 'recommendation', 'regression', 'stat', 'tree', 'util']
+
+import sys
+import rand as random
+random.__name__ = 'random'
+random.RandomRDDs.__module__ = __name__ + '.random'
+
+
+class RandomModuleHook(object):
+ """
+ Hook to import pyspark.mllib.random
+ """
+ fullname = __name__ + '.random'
+
+ def find_module(self, name, path=None):
+ # skip all other modules
+ if not name.startswith(self.fullname):
+ return
+ return self
+
+ def load_module(self, name):
+ if name == self.fullname:
+ return random
+
+ cname = name.rsplit('.', 1)[-1]
+ try:
+ return getattr(random, cname)
+ except AttributeError:
+ raise ImportError
+
+
+sys.meta_path.append(RandomModuleHook())
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 9ec28079ae..8cb992df2d 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -18,8 +18,11 @@
"""
Python package for feature in MLlib.
"""
+from __future__ import absolute_import
+
import sys
import warnings
+import random
from py4j.protocol import Py4JJavaError
@@ -341,8 +344,6 @@ class Word2Vec(object):
"""
Construct Word2Vec instance
"""
- import random # this can't be on the top because of mllib.random
-
self.vectorSize = 100
self.learningRate = 0.025
self.numPartitions = 1
@@ -411,8 +412,5 @@ def _test():
exit(-1)
if __name__ == "__main__":
- # remove current path from list of search paths to avoid importing mllib.random
- # for C{import random}, which is done in an external dependency of pyspark during doctests.
- import sys
sys.path.pop(0)
_test()
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index e35202dca0..537b176578 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -614,8 +614,4 @@ def _test():
exit(-1)
if __name__ == "__main__":
- # remove current path from list of search paths to avoid importing mllib.random
- # for C{import random}, which is done in an external dependency of pyspark during doctests.
- import sys
- sys.path.pop(0)
_test()
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/rand.py
index cb4304f921..cb4304f921 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/rand.py
diff --git a/python/run-tests b/python/run-tests
index a4f0cac059..e66854b44d 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -72,7 +72,7 @@ function run_mllib_tests() {
run_test "pyspark/mllib/clustering.py"
run_test "pyspark/mllib/feature.py"
run_test "pyspark/mllib/linalg.py"
- run_test "pyspark/mllib/random.py"
+ run_test "pyspark/mllib/rand.py"
run_test "pyspark/mllib/recommendation.py"
run_test "pyspark/mllib/regression.py"
run_test "pyspark/mllib/stat.py"