diff options
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/context.py | 8 | ||||
-rw-r--r-- | python/pyspark/rdd.py | 11 |
2 files changed, 19 insertions, 0 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index b5c2421b88..23ff8ccf61 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -230,6 +230,14 @@ class SparkContext(object): else: SparkContext._active_spark_context = instance + def __getnewargs__(self): + # This method is called when attempting to pickle SparkContext, which is always an error: + raise Exception( + "It appears that you are attempting to reference SparkContext from a broadcast " + "variable, action, or transforamtion. SparkContext can only be used on the driver, " + "not in code that it run on workers. For more information, see SPARK-5063." + ) + def __enter__(self): """ Enable 'with SparkContext(...) as sc: app(sc)' syntax. diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index bd2ff00c0f..f8b5f18253 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -141,6 +141,17 @@ class RDD(object): def __repr__(self): return self._jrdd.toString() + def __getnewargs__(self): + # This method is called when attempting to pickle an RDD, which is always an error: + raise Exception( + "It appears that you are attempting to broadcast an RDD or reference an RDD from an " + "action or transformation. RDD transformations and actions can only be invoked by the " + "driver, not inside of other transformations; for example, " + "rdd1.map(lambda x: rdd2.values.count() * x) is invalid because the values " + "transformation and count action cannot be performed inside of the rdd1.map " + "transformation. For more information, see SPARK-5063." + ) + @property def context(self): """ |