[SPARK-7899] [PYSPARK] Fix Python 3 pyspark/sql/types module conflict

This PR makes the types module in `pyspark/sql/types` work with pylint static analysis by removing the dynamic naming of the `pyspark/sql/_types` module to `pyspark/sql/types`. Tests are now loaded using `$PYSPARK_DRIVER_PYTHON -m module` rather than `$PYSPARK_DRIVER_PYTHON module.py`. The old method adds the location of `module.py` to `sys.path`, so this change prevents accidental use of relative paths in Python. Author: Michael Nazario <mnazario@palantir.com> Closes #6439 from mnazario/feature/SPARK-7899 and squashes the following commits: 366ef30 [Michael Nazario] Remove hack on random.py bb8b04d [Michael Nazario] Make doctests consistent with other tests 6ee4f75 [Michael Nazario] Change test scripts to use "-m" 673528f [Michael Nazario] Move _types back to types
author: Michael Nazario <mnazario@palantir.com> 2015-05-29 14:13:44 -0700
committer: Davies Liu <davies@databricks.com> 2015-05-29 14:13:44 -0700
commit: 1c5b19827a091b5aba69a967600e7ca35ed3bcfd (patch)
tree: d350243fffdced567492b8029773005b512a2abc /python/pyspark
parent: 5f48e5c33bafa376be5741e260a037c66103fdcd (diff)
download: spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.tar.gz
spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.tar.bz2
spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.zip
5 files changed, 4 insertions, 20 deletions
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 0d21a13204..adca90ddaf 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -261,3 +261,7 @@ def _start_update_server():
     thread.daemon = True
     thread.start()
     return server
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 07507b2ad0..b11aed2c3a 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -28,11 +28,3 @@ if numpy.version.version < '1.4':
 
 __all__ = ['classification', 'clustering', 'feature', 'fpm', 'linalg', 'random',
            'recommendation', 'regression', 'stat', 'tree', 'util']
-
-import sys
-from . import rand as random
-modname = __name__ + '.random'
-random.__name__ = modname
-random.RandomRDDs.__module__ = modname
-sys.modules[modname] = random
-del modname, sys
diff --git a/python/pyspark/mllib/rand.py b/python/pyspark/mllib/random.py
index 06fbc0eb6a..06fbc0eb6a 100644
--- a/python/pyspark/mllib/rand.py
+++ b/python/pyspark/mllib/random.py
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index 8fee92ae3a..726d288d97 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -50,18 +50,6 @@ def since(version):
         return f
     return deco
 
-# fix the module name conflict for Python 3+
-import sys
-from . import _types as types
-modname = __name__ + '.types'
-types.__name__ = modname
-# update the __module__ for all objects, make them picklable
-for v in types.__dict__.values():
-    if hasattr(v, "__module__") and v.__module__.endswith('._types'):
-        v.__module__ = modname
-sys.modules[modname] = types
-del modname, sys
-
 from pyspark.sql.types import Row
 from pyspark.sql.context import SQLContext, HiveContext
 from pyspark.sql.column import Column
diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/types.py
index 9e7e9f04bc..9e7e9f04bc 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/types.py
author	Michael Nazario <mnazario@palantir.com>	2015-05-29 14:13:44 -0700
committer	Davies Liu <davies@databricks.com>	2015-05-29 14:13:44 -0700
commit	1c5b19827a091b5aba69a967600e7ca35ed3bcfd (patch)
tree	d350243fffdced567492b8029773005b512a2abc /python/pyspark
parent	5f48e5c33bafa376be5741e260a037c66103fdcd (diff)
download	spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.tar.gz spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.tar.bz2 spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.zip