diff options
Diffstat (limited to 'python/pyspark/__init__.py')
-rw-r--r-- | python/pyspark/__init__.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 312c75d112..c58555fc9d 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -49,6 +49,16 @@ Hive: Main entry point for accessing data stored in Apache Hive.. """ +# The following block allows us to import python's random instead of mllib.random for scripts in +# mllib that depend on top level pyspark packages, which transitively depend on python's random. +# Since Python's import logic looks for modules in the current package first, we eliminate +# mllib.random as a candidate for C{import random} by removing the first search path, the script's +# location, in order to force the loader to look in Python's top-level modules for C{random}. +import sys +s = sys.path.pop(0) +import random +sys.path.insert(0, s) + from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql import SQLContext |