Merge pull request #396 from JoshRosen/spark-653

Make PySpark AccumulatorParam an abstract base class
author: Matei Zaharia <matei@eecs.berkeley.edu> 2013-01-24 13:05:03 -0800
committer: Matei Zaharia <matei@eecs.berkeley.edu> 2013-01-24 13:05:03 -0800
commit: a2f4891d1dd7c6c3ee189853ef78f60ec44907e1 (patch)
tree: 97ab1760be1f8317aad9f0a9fc8552e3fee94ec7 /python
parent: 0fe173a3a5afc35c913768d01f5bdc13654f0c64 (diff)
parent: b47d054cfc5ef45b92a1c970388722ffa0283e66 (diff)
download: spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.tar.gz
spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.tar.bz2
spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.zip
2 files changed, 29 insertions, 14 deletions
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 8011779ddc..61fcbbd376 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -25,7 +25,8 @@
 >>> a.value
 13
 
->>> class VectorAccumulatorParam(object):
+>>> from pyspark.accumulators import AccumulatorParam
+>>> class VectorAccumulatorParam(AccumulatorParam):
 ...     def zero(self, value):
 ...         return [0.0] * len(value)
 ...     def addInPlace(self, val1, val2):
@@ -90,8 +91,7 @@ class Accumulator(object):
 
     While C{SparkContext} supports accumulators for primitive data types like C{int} and
     C{float}, users can also define accumulators for custom types by providing a custom
-    C{AccumulatorParam} object with a C{zero} and C{addInPlace} method. Refer to the doctest
-    of this module for an example.
+    L{AccumulatorParam} object. Refer to the doctest of this module for an example.
     """
 
     def __init__(self, aid, value, accum_param):
@@ -134,7 +134,27 @@ class Accumulator(object):
         return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
 
 
-class AddingAccumulatorParam(object):
+class AccumulatorParam(object):
+    """
+    Helper object that defines how to accumulate values of a given type.
+    """
+
+    def zero(self, value):
+        """
+        Provide a "zero value" for the type, compatible in dimensions with the
+        provided C{value} (e.g., a zero vector)
+        """
+        raise NotImplementedError
+
+    def addInPlace(self, value1, value2):
+        """
+        Add two values of the accumulator's data type, returning a new value;
+        for efficiency, can also update C{value1} in place and return it.
+        """
+        raise NotImplementedError
+
+
+class AddingAccumulatorParam(AccumulatorParam):
     """
     An AccumulatorParam that uses the + operators to add values. Designed for simple types
     such as integers, floats, and lists. Requires the zero value for the underlying type
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3e33776af0..783e3dc148 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -165,16 +165,11 @@ class SparkContext(object):
 
     def accumulator(self, value, accum_param=None):
         """
-        Create an C{Accumulator} with the given initial value, using a given
-        AccumulatorParam helper object to define how to add values of the data
-        type if provided. Default AccumulatorParams are used for integers and
-        floating-point numbers if you do not provide one. For other types, the
-        AccumulatorParam must implement two methods:
-        - C{zero(value)}: provide a "zero value" for the type, compatible in
-          dimensions with the provided C{value} (e.g., a zero vector).
-        - C{addInPlace(val1, val2)}: add two values of the accumulator's data
-          type, returning a new value; for efficiency, can also update C{val1}
-          in place and return it.
+        Create an L{Accumulator} with the given initial value, using a given
+        L{AccumulatorParam} helper object to define how to add values of the
+        data type if provided. Default AccumulatorParams are used for integers
+        and floating-point numbers if you do not provide one. For other types,
+        a custom AccumulatorParam can be used.
         """
         if accum_param == None:
             if isinstance(value, int):
author	Matei Zaharia <matei@eecs.berkeley.edu>	2013-01-24 13:05:03 -0800
committer	Matei Zaharia <matei@eecs.berkeley.edu>	2013-01-24 13:05:03 -0800
commit	a2f4891d1dd7c6c3ee189853ef78f60ec44907e1 (patch)
tree	97ab1760be1f8317aad9f0a9fc8552e3fee94ec7 /python
parent	0fe173a3a5afc35c913768d01f5bdc13654f0c64 (diff)
parent	b47d054cfc5ef45b92a1c970388722ffa0283e66 (diff)
download	spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.tar.gz spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.tar.bz2 spark-a2f4891d1dd7c6c3ee189853ef78f60ec44907e1.zip