aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorMichael Nazario <mnazario@palantir.com>2015-05-29 14:13:44 -0700
committerDavies Liu <davies@databricks.com>2015-05-29 14:13:44 -0700
commit1c5b19827a091b5aba69a967600e7ca35ed3bcfd (patch)
treed350243fffdced567492b8029773005b512a2abc /python/pyspark
parent5f48e5c33bafa376be5741e260a037c66103fdcd (diff)
downloadspark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.tar.gz
spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.tar.bz2
spark-1c5b19827a091b5aba69a967600e7ca35ed3bcfd.zip
[SPARK-7899] [PYSPARK] Fix Python 3 pyspark/sql/types module conflict
This PR makes the types module in `pyspark/sql/types` work with pylint static analysis by removing the dynamic naming of the `pyspark/sql/_types` module to `pyspark/sql/types`. Tests are now loaded using `$PYSPARK_DRIVER_PYTHON -m module` rather than `$PYSPARK_DRIVER_PYTHON module.py`. The old method adds the location of `module.py` to `sys.path`, so this change prevents accidental use of relative paths in Python. Author: Michael Nazario <mnazario@palantir.com> Closes #6439 from mnazario/feature/SPARK-7899 and squashes the following commits: 366ef30 [Michael Nazario] Remove hack on random.py bb8b04d [Michael Nazario] Make doctests consistent with other tests 6ee4f75 [Michael Nazario] Change test scripts to use "-m" 673528f [Michael Nazario] Move _types back to types
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/accumulators.py4
-rw-r--r--python/pyspark/mllib/__init__.py8
-rw-r--r--python/pyspark/mllib/random.py (renamed from python/pyspark/mllib/rand.py)0
-rw-r--r--python/pyspark/sql/__init__.py12
-rw-r--r--python/pyspark/sql/types.py (renamed from python/pyspark/sql/_types.py)0
5 files changed, 4 insertions, 20 deletions
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 0d21a13204..adca90ddaf 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -261,3 +261,7 @@ def _start_update_server():
thread.daemon = True
thread.start()
return server
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py
index 07507b2ad0..b11aed2c3a 100644
--- a/python/pyspark/mllib/__init__.py
+++ b/python/pyspark/mllib/__init__.py
@@ -28,11 +28,3 @@ if numpy.version.version < '1.4':
__all__ = ['classification', 'clustering', 'feature', 'fpm', 'linalg', 'random',
'recommendation', 'regression', 'stat', 'tree', 'util']
-
-import sys
-from . import rand as random
-modname = __name__ + '.random'
-random.__name__ = modname
-random.RandomRDDs.__module__ = modname
-sys.modules[modname] = random
-del modname, sys
diff --git a/python/pyspark/mllib/rand.py b/python/pyspark/mllib/random.py
index 06fbc0eb6a..06fbc0eb6a 100644
--- a/python/pyspark/mllib/rand.py
+++ b/python/pyspark/mllib/random.py
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index 8fee92ae3a..726d288d97 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -50,18 +50,6 @@ def since(version):
return f
return deco
-# fix the module name conflict for Python 3+
-import sys
-from . import _types as types
-modname = __name__ + '.types'
-types.__name__ = modname
-# update the __module__ for all objects, make them picklable
-for v in types.__dict__.values():
- if hasattr(v, "__module__") and v.__module__.endswith('._types'):
- v.__module__ = modname
-sys.modules[modname] = types
-del modname, sys
-
from pyspark.sql.types import Row
from pyspark.sql.context import SQLContext, HiveContext
from pyspark.sql.column import Column
diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/types.py
index 9e7e9f04bc..9e7e9f04bc 100644
--- a/python/pyspark/sql/_types.py
+++ b/python/pyspark/sql/types.py