aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorMatei Zaharia <matei@eecs.berkeley.edu>2013-01-24 13:05:03 -0800
committerMatei Zaharia <matei@eecs.berkeley.edu>2013-01-24 13:05:03 -0800
commita2f4891d1dd7c6c3ee189853ef78f60ec44907e1 (patch)
tree97ab1760be1f8317aad9f0a9fc8552e3fee94ec7 /python
parent0fe173a3a5afc35c913768d01f5bdc13654f0c64 (diff)
parentb47d054cfc5ef45b92a1c970388722ffa0283e66 (diff)
downloadspark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.tar.gz
spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.tar.bz2
spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.zip
Merge pull request #396 from JoshRosen/spark-653
Make PySpark AccumulatorParam an abstract base class
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/accumulators.py28
-rw-r--r--python/pyspark/context.py15
2 files changed, 29 insertions, 14 deletions
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 8011779ddc..61fcbbd376 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -25,7 +25,8 @@
>>> a.value
13
->>> class VectorAccumulatorParam(object):
+>>> from pyspark.accumulators import AccumulatorParam
+>>> class VectorAccumulatorParam(AccumulatorParam):
... def zero(self, value):
... return [0.0] * len(value)
... def addInPlace(self, val1, val2):
@@ -90,8 +91,7 @@ class Accumulator(object):
While C{SparkContext} supports accumulators for primitive data types like C{int} and
C{float}, users can also define accumulators for custom types by providing a custom
- C{AccumulatorParam} object with a C{zero} and C{addInPlace} method. Refer to the doctest
- of this module for an example.
+ L{AccumulatorParam} object. Refer to the doctest of this module for an example.
"""
def __init__(self, aid, value, accum_param):
@@ -134,7 +134,27 @@ class Accumulator(object):
return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
-class AddingAccumulatorParam(object):
+class AccumulatorParam(object):
+ """
+ Helper object that defines how to accumulate values of a given type.
+ """
+
+ def zero(self, value):
+ """
+ Provide a "zero value" for the type, compatible in dimensions with the
+ provided C{value} (e.g., a zero vector)
+ """
+ raise NotImplementedError
+
+ def addInPlace(self, value1, value2):
+ """
+ Add two values of the accumulator's data type, returning a new value;
+ for efficiency, can also update C{value1} in place and return it.
+ """
+ raise NotImplementedError
+
+
+class AddingAccumulatorParam(AccumulatorParam):
"""
An AccumulatorParam that uses the + operators to add values. Designed for simple types
such as integers, floats, and lists. Requires the zero value for the underlying type
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3e33776af0..783e3dc148 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -165,16 +165,11 @@ class SparkContext(object):
def accumulator(self, value, accum_param=None):
"""
- Create an C{Accumulator} with the given initial value, using a given
- AccumulatorParam helper object to define how to add values of the data
- type if provided. Default AccumulatorParams are used for integers and
- floating-point numbers if you do not provide one. For other types, the
- AccumulatorParam must implement two methods:
- - C{zero(value)}: provide a "zero value" for the type, compatible in
- dimensions with the provided C{value} (e.g., a zero vector).
- - C{addInPlace(val1, val2)}: add two values of the accumulator's data
- type, returning a new value; for efficiency, can also update C{val1}
- in place and return it.
+ Create an L{Accumulator} with the given initial value, using a given
+ L{AccumulatorParam} helper object to define how to add values of the
+ data type if provided. Default AccumulatorParams are used for integers
+ and floating-point numbers if you do not provide one. For other types,
+ a custom AccumulatorParam can be used.
"""
if accum_param == None:
if isinstance(value, int):