aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/accumulators.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-20 15:12:54 -0800
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-21 22:32:57 -0800
commitc75ae3622eeed068c44b1f823ef4d87d01a720fd (patch)
tree9a985de52fc913bbbe87945e34e8a39ffcebab40 /python/pyspark/accumulators.py
parent7d3e359f2c463681cf0128da2c6692beb13dade9 (diff)
downloadspark-c75ae3622eeed068c44b1f823ef4d87d01a720fd.tar.gz
spark-c75ae3622eeed068c44b1f823ef4d87d01a720fd.tar.bz2
spark-c75ae3622eeed068c44b1f823ef4d87d01a720fd.zip
Make AccumulatorParam an abstract base class.
Diffstat (limited to 'python/pyspark/accumulators.py')
-rw-r--r--python/pyspark/accumulators.py29
1 files changed, 26 insertions, 3 deletions
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 8011779ddc..5a9269f9bb 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -61,6 +61,7 @@ Traceback (most recent call last):
Exception:...
"""
+from abc import ABCMeta, abstractmethod
import struct
import SocketServer
import threading
@@ -90,8 +91,7 @@ class Accumulator(object):
While C{SparkContext} supports accumulators for primitive data types like C{int} and
C{float}, users can also define accumulators for custom types by providing a custom
- C{AccumulatorParam} object with a C{zero} and C{addInPlace} method. Refer to the doctest
- of this module for an example.
+ L{AccumulatorParam} object. Refer to the doctest of this module for an example.
"""
def __init__(self, aid, value, accum_param):
@@ -134,7 +134,30 @@ class Accumulator(object):
return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
-class AddingAccumulatorParam(object):
+class AccumulatorParam(object):
+ """
+ Helper object that defines how to accumulate values of a given type.
+ """
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def zero(self, value):
+ """
+ Provide a "zero value" for the type, compatible in dimensions with the
+ provided C{value} (e.g., a zero vector)
+ """
+ return
+
+ @abstractmethod
+ def addInPlace(self, value1, value2):
+ """
+ Add two values of the accumulator's data type, returning a new value;
+ for efficiency, can also update C{value1} in place and return it.
+ """
+ return
+
+
+class AddingAccumulatorParam(AccumulatorParam):
"""
An AccumulatorParam that uses the + operators to add values. Designed for simple types
such as integers, floats, and lists. Requires the zero value for the underlying type