summaryrefslogblamecommitdiff
path: root/site/docs/0.9.0/api/pyspark/pyspark.accumulators-module.html
blob: ff170a047edca97aef73483772ab27f2379ba984 (plain) (tree)









































































































































































































































                                                                                                                                                                                            
                                                         


















                                                                
<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>pyspark.accumulators</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        <a href="pyspark-module.html">Package&nbsp;pyspark</a> ::
        Module&nbsp;accumulators
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="pyspark.accumulators-module.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<!-- ==================== MODULE DESCRIPTION ==================== -->
<h1 class="epydoc">Module accumulators</h1><p class="nomargin-top"><span class="codelink"><a href="pyspark.accumulators-pysrc.html">source&nbsp;code</a></span></p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">from</span> pyspark.context <span class="py-keyword">import</span> SparkContext
<span class="py-prompt">&gt;&gt;&gt; </span>sc = SparkContext(<span class="py-string">'local'</span>, <span class="py-string">'test'</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>a = sc.accumulator(1)
<span class="py-prompt">&gt;&gt;&gt; </span>a.value
<span class="py-output">1</span>
<span class="py-output"></span><span class="py-prompt">&gt;&gt;&gt; </span>a.value = 2
<span class="py-prompt">&gt;&gt;&gt; </span>a.value
<span class="py-output">2</span>
<span class="py-output"></span><span class="py-prompt">&gt;&gt;&gt; </span>a += 5
<span class="py-prompt">&gt;&gt;&gt; </span>a.value
<span class="py-output">7</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>sc.accumulator(1.0).value
<span class="py-output">1.0</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>sc.accumulator(1j).value
<span class="py-output">1j</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>rdd = sc.parallelize([1,2,3])
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">f</span>(x):
<span class="py-more">... </span>    <span class="py-keyword">global</span> a
<span class="py-more">... </span>    a += x
<span class="py-prompt">&gt;&gt;&gt; </span>rdd.foreach(f)
<span class="py-prompt">&gt;&gt;&gt; </span>a.value
<span class="py-output">13</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>b = sc.accumulator(0)
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">g</span>(x):
<span class="py-more">... </span>    b.add(x)
<span class="py-prompt">&gt;&gt;&gt; </span>rdd.foreach(g)
<span class="py-prompt">&gt;&gt;&gt; </span>b.value
<span class="py-output">6</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">from</span> pyspark.accumulators <span class="py-keyword">import</span> AccumulatorParam
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">class</span> <span class="py-defname">VectorAccumulatorParam</span>(AccumulatorParam):
<span class="py-more">... </span>    <span class="py-keyword">def</span> <span class="py-defname">zero</span>(self, value):
<span class="py-more">... </span>        return [0.0] * len(value)
<span class="py-more">... </span>    <span class="py-keyword">def</span> <span class="py-defname">addInPlace</span>(self, val1, val2):
<span class="py-more">... </span>        <span class="py-keyword">for</span> i <span class="py-keyword">in</span> xrange(len(val1)):
<span class="py-more">... </span>             val1[i] += val2[i]
<span class="py-more">... </span>        return val1
<span class="py-prompt">&gt;&gt;&gt; </span>va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
<span class="py-prompt">&gt;&gt;&gt; </span>va.value
<span class="py-output">[1.0, 2.0, 3.0]</span>
<span class="py-output"></span><span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">g</span>(x):
<span class="py-more">... </span>    <span class="py-keyword">global</span> va
<span class="py-more">... </span>    va += [x] * 3
<span class="py-prompt">&gt;&gt;&gt; </span>rdd.foreach(g)
<span class="py-prompt">&gt;&gt;&gt; </span>va.value
<span class="py-output">[7.0, 8.0, 9.0]</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>rdd.map(<span class="py-keyword">lambda</span> x: a.value).collect() <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except">    ...</span>
<span class="py-except">Py4JJavaError:...</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">h</span>(x):
<span class="py-more">... </span>    <span class="py-keyword">global</span> a
<span class="py-more">... </span>    a.value = 7
<span class="py-prompt">&gt;&gt;&gt; </span>rdd.foreach(h) <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except">    ...</span>
<span class="py-except">Py4JJavaError:...</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>sc.accumulator([1.0, 2.0, 3.0]) <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except">    ...</span>
<span class="py-except">Exception:...</span></pre>

<!-- ==================== CLASSES ==================== -->
<a name="section-Classes"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Classes</span></td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a href="pyspark.accumulators.Accumulator-class.html" class="summary-name">Accumulator</a><br />
      A shared variable that can be accumulated, i.e., has a commutative 
        and associative &quot;add&quot; operation.
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a href="pyspark.accumulators.AccumulatorParam-class.html" class="summary-name">AccumulatorParam</a><br />
      Helper object that defines how to accumulate values of a given 
        type.
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a href="pyspark.accumulators.AddingAccumulatorParam-class.html" class="summary-name">AddingAccumulatorParam</a><br />
      An AccumulatorParam that uses the + operators to add values.
    </td>
  </tr>
</table>
<!-- ==================== VARIABLES ==================== -->
<a name="section-Variables"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Variables</span></td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a name="pickleSer"></a><span class="summary-name">pickleSer</span> = <code title="PickleSerializer()">PickleSerializer()</code>
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a name="INT_ACCUMULATOR_PARAM"></a><span class="summary-name">INT_ACCUMULATOR_PARAM</span> = <code title="AddingAccumulatorParam(0)">AddingAccumulatorParam(0)</code>
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a name="FLOAT_ACCUMULATOR_PARAM"></a><span class="summary-name">FLOAT_ACCUMULATOR_PARAM</span> = <code title="AddingAccumulatorParam(0.0)">AddingAccumulatorParam(0.0)</code>
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
        <a name="COMPLEX_ACCUMULATOR_PARAM"></a><span class="summary-name">COMPLEX_ACCUMULATOR_PARAM</span> = <code title="AddingAccumulatorParam(0.0j)">AddingAccumulatorParam(0.0j)</code>
    </td>
  </tr>
</table>
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Sun Mar  2 16:35:00 2014
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>