diff options
author | Erik Shilts <erik.shilts@opower.com> | 2015-09-29 13:38:15 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-09-29 13:38:15 -0700 |
commit | 7d399c9daa6769ab234890c551e1b3456e0e6e85 (patch) | |
tree | 7248de1bb7f92111fd7cbfb22ff9ffb261bd2758 /python/pyspark/statcounter.py | |
parent | ab41864f91713b450695babd5c1424622cb57a54 (diff) | |
download | spark-7d399c9daa6769ab234890c551e1b3456e0e6e85.tar.gz spark-7d399c9daa6769ab234890c551e1b3456e0e6e85.tar.bz2 spark-7d399c9daa6769ab234890c551e1b3456e0e6e85.zip |
[SPARK-6919] [PYSPARK] Add asDict method to StatCounter
Add method to easily convert a StatCounter instance into a Python dict
https://issues.apache.org/jira/browse/SPARK-6919
Note: This is my original work and the existing Spark license applies.
Author: Erik Shilts <erik.shilts@opower.com>
Closes #5516 from eshilts/statcounter-asdict.
Diffstat (limited to 'python/pyspark/statcounter.py')
-rw-r--r-- | python/pyspark/statcounter.py | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py index 0fee3b2096..03ea0b6d33 100644 --- a/python/pyspark/statcounter.py +++ b/python/pyspark/statcounter.py @@ -131,6 +131,28 @@ class StatCounter(object): def sampleStdev(self): return sqrt(self.sampleVariance()) + def asDict(self, sample=False): + """Returns the :class:`StatCounter` members as a ``dict``. + + >>> sc.parallelize([1., 2., 3., 4.]).stats().asDict() + {'count': 4L, + 'max': 4.0, + 'mean': 2.5, + 'min': 1.0, + 'stdev': 1.2909944487358056, + 'sum': 10.0, + 'variance': 1.6666666666666667} + """ + return { + 'count': self.count(), + 'mean': self.mean(), + 'sum': self.sum(), + 'min': self.min(), + 'max': self.max(), + 'stdev': self.stdev() if sample else self.sampleStdev(), + 'variance': self.variance() if sample else self.sampleVariance() + } + def __repr__(self): return ("(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" % (self.count(), self.mean(), self.stdev(), self.max(), self.min())) |