blob: f017a9b8b4e98537447e524c043bee4adc0dbff9 (
plain) (
tree)
|
|
<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>pyspark.accumulators</title>
<link rel="stylesheet" href="epydoc.css" type="text/css" />
<script type="text/javascript" src="epydoc.js"></script>
</head>
<body bgcolor="white" text="black" link="blue" vlink="#204080"
alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
bgcolor="#a0c0ff" cellspacing="0">
<tr valign="middle">
<!-- Home link -->
<th> <a
href="pyspark-module.html">Home</a> </th>
<!-- Tree link -->
<th> <a
href="module-tree.html">Trees</a> </th>
<!-- Index link -->
<th> <a
href="identifier-index.html">Indices</a> </th>
<!-- Help link -->
<th> <a
href="help.html">Help</a> </th>
<!-- Project homepage -->
<th class="navbar" align="right" width="100%">
<table border="0" cellpadding="0" cellspacing="0">
<tr><th class="navbar" align="center"
><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
</tr></table></th>
</tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
<tr valign="top">
<td width="100%">
<span class="breadcrumbs">
<a href="pyspark-module.html">Package pyspark</a> ::
Module accumulators
</span>
</td>
<td>
<table cellpadding="0" cellspacing="0">
<!-- hide/show private -->
<tr><td align="right"><span class="options"
>[<a href="frames.html" target="_top">frames</a
>] | <a href="pyspark.accumulators-module.html"
target="_top">no frames</a>]</span></td></tr>
</table>
</td>
</tr>
</table>
<!-- ==================== MODULE DESCRIPTION ==================== -->
<h1 class="epydoc">Module accumulators</h1><p class="nomargin-top"><span class="codelink"><a href="pyspark.accumulators-pysrc.html">source code</a></span></p>
<pre class="py-doctest">
<span class="py-prompt">>>> </span><span class="py-keyword">from</span> pyspark.context <span class="py-keyword">import</span> SparkContext
<span class="py-prompt">>>> </span>sc = SparkContext(<span class="py-string">'local'</span>, <span class="py-string">'test'</span>)
<span class="py-prompt">>>> </span>a = sc.accumulator(1)
<span class="py-prompt">>>> </span>a.value
<span class="py-output">1</span>
<span class="py-output"></span><span class="py-prompt">>>> </span>a.value = 2
<span class="py-prompt">>>> </span>a.value
<span class="py-output">2</span>
<span class="py-output"></span><span class="py-prompt">>>> </span>a += 5
<span class="py-prompt">>>> </span>a.value
<span class="py-output">7</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span>sc.accumulator(1.0).value
<span class="py-output">1.0</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span>sc.accumulator(1j).value
<span class="py-output">1j</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span>rdd = sc.parallelize([1,2,3])
<span class="py-prompt">>>> </span><span class="py-keyword">def</span> <span class="py-defname">f</span>(x):
<span class="py-more">... </span> <span class="py-keyword">global</span> a
<span class="py-more">... </span> a += x
<span class="py-prompt">>>> </span>rdd.foreach(f)
<span class="py-prompt">>>> </span>a.value
<span class="py-output">13</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span><span class="py-keyword">from</span> pyspark.accumulators <span class="py-keyword">import</span> AccumulatorParam
<span class="py-prompt">>>> </span><span class="py-keyword">class</span> <span class="py-defname">VectorAccumulatorParam</span>(AccumulatorParam):
<span class="py-more">... </span> <span class="py-keyword">def</span> <span class="py-defname">zero</span>(self, value):
<span class="py-more">... </span> return [0.0] * len(value)
<span class="py-more">... </span> <span class="py-keyword">def</span> <span class="py-defname">addInPlace</span>(self, val1, val2):
<span class="py-more">... </span> <span class="py-keyword">for</span> i <span class="py-keyword">in</span> xrange(len(val1)):
<span class="py-more">... </span> val1[i] += val2[i]
<span class="py-more">... </span> return val1
<span class="py-prompt">>>> </span>va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
<span class="py-prompt">>>> </span>va.value
<span class="py-output">[1.0, 2.0, 3.0]</span>
<span class="py-output"></span><span class="py-prompt">>>> </span><span class="py-keyword">def</span> <span class="py-defname">g</span>(x):
<span class="py-more">... </span> <span class="py-keyword">global</span> va
<span class="py-more">... </span> va += [x] * 3
<span class="py-prompt">>>> </span>rdd.foreach(g)
<span class="py-prompt">>>> </span>va.value
<span class="py-output">[7.0, 8.0, 9.0]</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span>rdd.map(<span class="py-keyword">lambda</span> x: a.value).collect() <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except"> ...</span>
<span class="py-except">Py4JJavaError:...</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span><span class="py-keyword">def</span> <span class="py-defname">h</span>(x):
<span class="py-more">... </span> <span class="py-keyword">global</span> a
<span class="py-more">... </span> a.value = 7
<span class="py-prompt">>>> </span>rdd.foreach(h) <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except"> ...</span>
<span class="py-except">Py4JJavaError:...</span></pre>
<pre class="py-doctest">
<span class="py-prompt">>>> </span>sc.accumulator([1.0, 2.0, 3.0]) <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except"> ...</span>
<span class="py-except">Exception:...</span></pre>
<!-- ==================== CLASSES ==================== -->
<a name="section-Classes"></a>
<table class="summary" border="1" cellpadding="3"
cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
<td align="left" colspan="2" class="table-header">
<span class="table-header">Classes</span></td>
</tr>
<tr>
<td width="15%" align="right" valign="top" class="summary">
<span class="summary-type"> </span>
</td><td class="summary">
<a href="pyspark.accumulators.Accumulator-class.html" class="summary-name">Accumulator</a><br />
A shared variable that can be accumulated, i.e., has a commutative
and associative "add" operation.
</td>
</tr>
<tr>
<td width="15%" align="right" valign="top" class="summary">
<span class="summary-type"> </span>
</td><td class="summary">
<a href="pyspark.accumulators.AccumulatorParam-class.html" class="summary-name">AccumulatorParam</a><br />
Helper object that defines how to accumulate values of a given
type.
</td>
</tr>
<tr>
<td width="15%" align="right" valign="top" class="summary">
<span class="summary-type"> </span>
</td><td class="summary">
<a href="pyspark.accumulators.AddingAccumulatorParam-class.html" class="summary-name">AddingAccumulatorParam</a><br />
An AccumulatorParam that uses the + operators to add values.
</td>
</tr>
</table>
<!-- ==================== VARIABLES ==================== -->
<a name="section-Variables"></a>
<table class="summary" border="1" cellpadding="3"
cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
<td align="left" colspan="2" class="table-header">
<span class="table-header">Variables</span></td>
</tr>
<tr>
<td width="15%" align="right" valign="top" class="summary">
<span class="summary-type"> </span>
</td><td class="summary">
<a name="INT_ACCUMULATOR_PARAM"></a><span class="summary-name">INT_ACCUMULATOR_PARAM</span> = <code title="AddingAccumulatorParam(0)">AddingAccumulatorParam(0)</code>
</td>
</tr>
<tr>
<td width="15%" align="right" valign="top" class="summary">
<span class="summary-type"> </span>
</td><td class="summary">
<a name="FLOAT_ACCUMULATOR_PARAM"></a><span class="summary-name">FLOAT_ACCUMULATOR_PARAM</span> = <code title="AddingAccumulatorParam(0.0)">AddingAccumulatorParam(0.0)</code>
</td>
</tr>
<tr>
<td width="15%" align="right" valign="top" class="summary">
<span class="summary-type"> </span>
</td><td class="summary">
<a name="COMPLEX_ACCUMULATOR_PARAM"></a><span class="summary-name">COMPLEX_ACCUMULATOR_PARAM</span> = <code title="AddingAccumulatorParam(0.0j)">AddingAccumulatorParam(0.0j)</code>
</td>
</tr>
</table>
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
bgcolor="#a0c0ff" cellspacing="0">
<tr valign="middle">
<!-- Home link -->
<th> <a
href="pyspark-module.html">Home</a> </th>
<!-- Tree link -->
<th> <a
href="module-tree.html">Trees</a> </th>
<!-- Index link -->
<th> <a
href="identifier-index.html">Indices</a> </th>
<!-- Help link -->
<th> <a
href="help.html">Help</a> </th>
<!-- Project homepage -->
<th class="navbar" align="right" width="100%">
<table border="0" cellpadding="0" cellspacing="0">
<tr><th class="navbar" align="center"
><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
</tr></table></th>
</tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
<tr>
<td align="left" class="footer">
Generated by Epydoc 3.0.1 on Tue Feb 26 22:47:39 2013
</td>
<td align="right" class="footer">
<a target="mainFrame" href="http://epydoc.sourceforge.net"
>http://epydoc.sourceforge.net</a>
</td>
</tr>
</table>
<script type="text/javascript">
<!--
// Private objects are initially displayed (because if
// javascript is turned off then we want them to be
// visible); but by default, we want to hide them. So hide
// them unless we have a cookie that says to show them.
checkCookie();
// -->
</script>
</body>
</html>
|