1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
|
<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>pyspark.accumulators</title>
<link rel="stylesheet" href="epydoc.css" type="text/css" />
<script type="text/javascript" src="epydoc.js"></script>
</head>
<body bgcolor="white" text="black" link="blue" vlink="#204080"
alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
bgcolor="#a0c0ff" cellspacing="0">
<tr valign="middle">
<!-- Home link -->
<th> <a
href="pyspark-module.html">Home</a> </th>
<!-- Tree link -->
<th> <a
href="module-tree.html">Trees</a> </th>
<!-- Index link -->
<th> <a
href="identifier-index.html">Indices</a> </th>
<!-- Help link -->
<th> <a
href="help.html">Help</a> </th>
<!-- Project homepage -->
<th class="navbar" align="right" width="100%">
<table border="0" cellpadding="0" cellspacing="0">
<tr><th class="navbar" align="center"
><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
</tr></table></th>
</tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
<tr valign="top">
<td width="100%">
<span class="breadcrumbs">
<a href="pyspark-module.html">Package pyspark</a> ::
Module accumulators
</span>
</td>
<td>
<table cellpadding="0" cellspacing="0">
<!-- hide/show private -->
<tr><td align="right"><span class="options"
>[<a href="frames.html" target="_top">frames</a
>] | <a href="pyspark.accumulators-pysrc.html"
target="_top">no frames</a>]</span></td></tr>
</table>
</td>
</tr>
</table>
<h1 class="epydoc">Source Code for <a href="pyspark.accumulators-module.html">Module pyspark.accumulators</a></h1>
<pre class="py-src">
<a name="L1"></a><tt class="py-lineno"> 1</tt> <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L2"></a><tt class="py-lineno"> 2</tt> <tt class="py-line"><tt class="py-comment"># Licensed to the Apache Software Foundation (ASF) under one or more</tt> </tt>
<a name="L3"></a><tt class="py-lineno"> 3</tt> <tt class="py-line"><tt class="py-comment"># contributor license agreements. See the NOTICE file distributed with</tt> </tt>
<a name="L4"></a><tt class="py-lineno"> 4</tt> <tt class="py-line"><tt class="py-comment"># this work for additional information regarding copyright ownership.</tt> </tt>
<a name="L5"></a><tt class="py-lineno"> 5</tt> <tt class="py-line"><tt class="py-comment"># The ASF licenses this file to You under the Apache License, Version 2.0</tt> </tt>
<a name="L6"></a><tt class="py-lineno"> 6</tt> <tt class="py-line"><tt class="py-comment"># (the "License"); you may not use this file except in compliance with</tt> </tt>
<a name="L7"></a><tt class="py-lineno"> 7</tt> <tt class="py-line"><tt class="py-comment"># the License. You may obtain a copy of the License at</tt> </tt>
<a name="L8"></a><tt class="py-lineno"> 8</tt> <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L9"></a><tt class="py-lineno"> 9</tt> <tt class="py-line"><tt class="py-comment"># http://www.apache.org/licenses/LICENSE-2.0</tt> </tt>
<a name="L10"></a><tt class="py-lineno"> 10</tt> <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L11"></a><tt class="py-lineno"> 11</tt> <tt class="py-line"><tt class="py-comment"># Unless required by applicable law or agreed to in writing, software</tt> </tt>
<a name="L12"></a><tt class="py-lineno"> 12</tt> <tt class="py-line"><tt class="py-comment"># distributed under the License is distributed on an "AS IS" BASIS,</tt> </tt>
<a name="L13"></a><tt class="py-lineno"> 13</tt> <tt class="py-line"><tt class="py-comment"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</tt> </tt>
<a name="L14"></a><tt class="py-lineno"> 14</tt> <tt class="py-line"><tt class="py-comment"># See the License for the specific language governing permissions and</tt> </tt>
<a name="L15"></a><tt class="py-lineno"> 15</tt> <tt class="py-line"><tt class="py-comment"># limitations under the License.</tt> </tt>
<a name="L16"></a><tt class="py-lineno"> 16</tt> <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L17"></a><tt class="py-lineno"> 17</tt> <tt class="py-line"> </tt>
<a name="L18"></a><tt class="py-lineno"> 18</tt> <tt class="py-line"><tt class="py-docstring">"""</tt> </tt>
<a name="L19"></a><tt class="py-lineno"> 19</tt> <tt class="py-line"><tt class="py-docstring">>>> from pyspark.context import SparkContext</tt> </tt>
<a name="L20"></a><tt class="py-lineno"> 20</tt> <tt class="py-line"><tt class="py-docstring">>>> sc = SparkContext('local', 'test')</tt> </tt>
<a name="L21"></a><tt class="py-lineno"> 21</tt> <tt class="py-line"><tt class="py-docstring">>>> a = sc.accumulator(1)</tt> </tt>
<a name="L22"></a><tt class="py-lineno"> 22</tt> <tt class="py-line"><tt class="py-docstring">>>> a.value</tt> </tt>
<a name="L23"></a><tt class="py-lineno"> 23</tt> <tt class="py-line"><tt class="py-docstring">1</tt> </tt>
<a name="L24"></a><tt class="py-lineno"> 24</tt> <tt class="py-line"><tt class="py-docstring">>>> a.value = 2</tt> </tt>
<a name="L25"></a><tt class="py-lineno"> 25</tt> <tt class="py-line"><tt class="py-docstring">>>> a.value</tt> </tt>
<a name="L26"></a><tt class="py-lineno"> 26</tt> <tt class="py-line"><tt class="py-docstring">2</tt> </tt>
<a name="L27"></a><tt class="py-lineno"> 27</tt> <tt class="py-line"><tt class="py-docstring">>>> a += 5</tt> </tt>
<a name="L28"></a><tt class="py-lineno"> 28</tt> <tt class="py-line"><tt class="py-docstring">>>> a.value</tt> </tt>
<a name="L29"></a><tt class="py-lineno"> 29</tt> <tt class="py-line"><tt class="py-docstring">7</tt> </tt>
<a name="L30"></a><tt class="py-lineno"> 30</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L31"></a><tt class="py-lineno"> 31</tt> <tt class="py-line"><tt class="py-docstring">>>> sc.accumulator(1.0).value</tt> </tt>
<a name="L32"></a><tt class="py-lineno"> 32</tt> <tt class="py-line"><tt class="py-docstring">1.0</tt> </tt>
<a name="L33"></a><tt class="py-lineno"> 33</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L34"></a><tt class="py-lineno"> 34</tt> <tt class="py-line"><tt class="py-docstring">>>> sc.accumulator(1j).value</tt> </tt>
<a name="L35"></a><tt class="py-lineno"> 35</tt> <tt class="py-line"><tt class="py-docstring">1j</tt> </tt>
<a name="L36"></a><tt class="py-lineno"> 36</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L37"></a><tt class="py-lineno"> 37</tt> <tt class="py-line"><tt class="py-docstring">>>> rdd = sc.parallelize([1,2,3])</tt> </tt>
<a name="L38"></a><tt class="py-lineno"> 38</tt> <tt class="py-line"><tt class="py-docstring">>>> def f(x):</tt> </tt>
<a name="L39"></a><tt class="py-lineno"> 39</tt> <tt class="py-line"><tt class="py-docstring">... global a</tt> </tt>
<a name="L40"></a><tt class="py-lineno"> 40</tt> <tt class="py-line"><tt class="py-docstring">... a += x</tt> </tt>
<a name="L41"></a><tt class="py-lineno"> 41</tt> <tt class="py-line"><tt class="py-docstring">>>> rdd.foreach(f)</tt> </tt>
<a name="L42"></a><tt class="py-lineno"> 42</tt> <tt class="py-line"><tt class="py-docstring">>>> a.value</tt> </tt>
<a name="L43"></a><tt class="py-lineno"> 43</tt> <tt class="py-line"><tt class="py-docstring">13</tt> </tt>
<a name="L44"></a><tt class="py-lineno"> 44</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L45"></a><tt class="py-lineno"> 45</tt> <tt class="py-line"><tt class="py-docstring">>>> b = sc.accumulator(0)</tt> </tt>
<a name="L46"></a><tt class="py-lineno"> 46</tt> <tt class="py-line"><tt class="py-docstring">>>> def g(x):</tt> </tt>
<a name="L47"></a><tt class="py-lineno"> 47</tt> <tt class="py-line"><tt class="py-docstring">... b.add(x)</tt> </tt>
<a name="L48"></a><tt class="py-lineno"> 48</tt> <tt class="py-line"><tt class="py-docstring">>>> rdd.foreach(g)</tt> </tt>
<a name="L49"></a><tt class="py-lineno"> 49</tt> <tt class="py-line"><tt class="py-docstring">>>> b.value</tt> </tt>
<a name="L50"></a><tt class="py-lineno"> 50</tt> <tt class="py-line"><tt class="py-docstring">6</tt> </tt>
<a name="L51"></a><tt class="py-lineno"> 51</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L52"></a><tt class="py-lineno"> 52</tt> <tt class="py-line"><tt class="py-docstring">>>> from pyspark.accumulators import AccumulatorParam</tt> </tt>
<a name="L53"></a><tt class="py-lineno"> 53</tt> <tt class="py-line"><tt class="py-docstring">>>> class VectorAccumulatorParam(AccumulatorParam):</tt> </tt>
<a name="L54"></a><tt class="py-lineno"> 54</tt> <tt class="py-line"><tt class="py-docstring">... def zero(self, value):</tt> </tt>
<a name="L55"></a><tt class="py-lineno"> 55</tt> <tt class="py-line"><tt class="py-docstring">... return [0.0] * len(value)</tt> </tt>
<a name="L56"></a><tt class="py-lineno"> 56</tt> <tt class="py-line"><tt class="py-docstring">... def addInPlace(self, val1, val2):</tt> </tt>
<a name="L57"></a><tt class="py-lineno"> 57</tt> <tt class="py-line"><tt class="py-docstring">... for i in xrange(len(val1)):</tt> </tt>
<a name="L58"></a><tt class="py-lineno"> 58</tt> <tt class="py-line"><tt class="py-docstring">... val1[i] += val2[i]</tt> </tt>
<a name="L59"></a><tt class="py-lineno"> 59</tt> <tt class="py-line"><tt class="py-docstring">... return val1</tt> </tt>
<a name="L60"></a><tt class="py-lineno"> 60</tt> <tt class="py-line"><tt class="py-docstring">>>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())</tt> </tt>
<a name="L61"></a><tt class="py-lineno"> 61</tt> <tt class="py-line"><tt class="py-docstring">>>> va.value</tt> </tt>
<a name="L62"></a><tt class="py-lineno"> 62</tt> <tt class="py-line"><tt class="py-docstring">[1.0, 2.0, 3.0]</tt> </tt>
<a name="L63"></a><tt class="py-lineno"> 63</tt> <tt class="py-line"><tt class="py-docstring">>>> def g(x):</tt> </tt>
<a name="L64"></a><tt class="py-lineno"> 64</tt> <tt class="py-line"><tt class="py-docstring">... global va</tt> </tt>
<a name="L65"></a><tt class="py-lineno"> 65</tt> <tt class="py-line"><tt class="py-docstring">... va += [x] * 3</tt> </tt>
<a name="L66"></a><tt class="py-lineno"> 66</tt> <tt class="py-line"><tt class="py-docstring">>>> rdd.foreach(g)</tt> </tt>
<a name="L67"></a><tt class="py-lineno"> 67</tt> <tt class="py-line"><tt class="py-docstring">>>> va.value</tt> </tt>
<a name="L68"></a><tt class="py-lineno"> 68</tt> <tt class="py-line"><tt class="py-docstring">[7.0, 8.0, 9.0]</tt> </tt>
<a name="L69"></a><tt class="py-lineno"> 69</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L70"></a><tt class="py-lineno"> 70</tt> <tt class="py-line"><tt class="py-docstring">>>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL</tt> </tt>
<a name="L71"></a><tt class="py-lineno"> 71</tt> <tt class="py-line"><tt class="py-docstring">Traceback (most recent call last):</tt> </tt>
<a name="L72"></a><tt class="py-lineno"> 72</tt> <tt class="py-line"><tt class="py-docstring"> ...</tt> </tt>
<a name="L73"></a><tt class="py-lineno"> 73</tt> <tt class="py-line"><tt class="py-docstring">Py4JJavaError:...</tt> </tt>
<a name="L74"></a><tt class="py-lineno"> 74</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L75"></a><tt class="py-lineno"> 75</tt> <tt class="py-line"><tt class="py-docstring">>>> def h(x):</tt> </tt>
<a name="L76"></a><tt class="py-lineno"> 76</tt> <tt class="py-line"><tt class="py-docstring">... global a</tt> </tt>
<a name="L77"></a><tt class="py-lineno"> 77</tt> <tt class="py-line"><tt class="py-docstring">... a.value = 7</tt> </tt>
<a name="L78"></a><tt class="py-lineno"> 78</tt> <tt class="py-line"><tt class="py-docstring">>>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL</tt> </tt>
<a name="L79"></a><tt class="py-lineno"> 79</tt> <tt class="py-line"><tt class="py-docstring">Traceback (most recent call last):</tt> </tt>
<a name="L80"></a><tt class="py-lineno"> 80</tt> <tt class="py-line"><tt class="py-docstring"> ...</tt> </tt>
<a name="L81"></a><tt class="py-lineno"> 81</tt> <tt class="py-line"><tt class="py-docstring">Py4JJavaError:...</tt> </tt>
<a name="L82"></a><tt class="py-lineno"> 82</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L83"></a><tt class="py-lineno"> 83</tt> <tt class="py-line"><tt class="py-docstring">>>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL</tt> </tt>
<a name="L84"></a><tt class="py-lineno"> 84</tt> <tt class="py-line"><tt class="py-docstring">Traceback (most recent call last):</tt> </tt>
<a name="L85"></a><tt class="py-lineno"> 85</tt> <tt class="py-line"><tt class="py-docstring"> ...</tt> </tt>
<a name="L86"></a><tt class="py-lineno"> 86</tt> <tt class="py-line"><tt class="py-docstring">Exception:...</tt> </tt>
<a name="L87"></a><tt class="py-lineno"> 87</tt> <tt class="py-line"><tt class="py-docstring">"""</tt> </tt>
<a name="L88"></a><tt class="py-lineno"> 88</tt> <tt class="py-line"> </tt>
<a name="L89"></a><tt class="py-lineno"> 89</tt> <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">struct</tt> </tt>
<a name="L90"></a><tt class="py-lineno"> 90</tt> <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">SocketServer</tt> </tt>
<a name="L91"></a><tt class="py-lineno"> 91</tt> <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">threading</tt> </tt>
<a name="L92"></a><tt class="py-lineno"> 92</tt> <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-0" class="py-name" targets="Package pyspark=pyspark-module.html"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-0', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt class="py-name">cloudpickle</tt> <tt class="py-keyword">import</tt> <tt class="py-name">CloudPickler</tt> </tt>
<a name="L93"></a><tt class="py-lineno"> 93</tt> <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-1" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-1', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-2" class="py-name" targets="Module pyspark.serializers=pyspark.serializers-module.html"><a title="pyspark.serializers" class="py-name" href="#" onclick="return doclink('link-2', 'serializers', 'link-2');">serializers</a></tt> <tt class="py-keyword">import</tt> <tt class="py-name">read_int</tt><tt class="py-op">,</tt> <tt id="link-3" class="py-name" targets="Class pyspark.serializers.PickleSerializer=pyspark.serializers.PickleSerializer-class.html"><a title="pyspark.serializers.PickleSerializer" class="py-name" href="#" onclick="return doclink('link-3', 'PickleSerializer', 'link-3');">PickleSerializer</a></tt> </tt>
<a name="L94"></a><tt class="py-lineno"> 94</tt> <tt class="py-line"> </tt>
<a name="L95"></a><tt class="py-lineno"> 95</tt> <tt class="py-line"> </tt>
<a name="L96"></a><tt class="py-lineno"> 96</tt> <tt class="py-line"><tt id="link-4" class="py-name" targets="Variable pyspark.accumulators.pickleSer=pyspark.accumulators-module.html#pickleSer"><a title="pyspark.accumulators.pickleSer" class="py-name" href="#" onclick="return doclink('link-4', 'pickleSer', 'link-4');">pickleSer</a></tt> <tt class="py-op">=</tt> <tt id="link-5" class="py-name"><a title="pyspark.serializers.PickleSerializer" class="py-name" href="#" onclick="return doclink('link-5', 'PickleSerializer', 'link-3');">PickleSerializer</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L97"></a><tt class="py-lineno"> 97</tt> <tt class="py-line"> </tt>
<a name="L98"></a><tt class="py-lineno"> 98</tt> <tt class="py-line"><tt class="py-comment"># Holds accumulators registered on the current machine, keyed by ID. This is then used to send</tt> </tt>
<a name="L99"></a><tt class="py-lineno"> 99</tt> <tt class="py-line"><tt class="py-comment"># the local accumulator updates back to the driver program at the end of a task.</tt> </tt>
<a name="L100"></a><tt class="py-lineno">100</tt> <tt class="py-line"><tt id="link-6" class="py-name" targets="Variable pyspark.accumulators._accumulatorRegistry=pyspark.accumulators-module.html#_accumulatorRegistry"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-6', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt> <tt class="py-op">=</tt> <tt class="py-op">{</tt><tt class="py-op">}</tt> </tt>
<a name="_deserialize_accumulator"></a><div id="_deserialize_accumulator-def"><a name="L101"></a><tt class="py-lineno">101</tt> <tt class="py-line"> </tt>
<a name="L102"></a><tt class="py-lineno">102</tt> <tt class="py-line"> </tt>
<a name="L103"></a><tt class="py-lineno">103</tt> <a class="py-toggle" href="#" id="_deserialize_accumulator-toggle" onclick="return toggle('_deserialize_accumulator');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators-module.html#_deserialize_accumulator">_deserialize_accumulator</a><tt class="py-op">(</tt><tt class="py-param">aid</tt><tt class="py-op">,</tt> <tt class="py-param">zero_value</tt><tt class="py-op">,</tt> <tt class="py-param">accum_param</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_deserialize_accumulator-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_deserialize_accumulator-expanded"><a name="L104"></a><tt class="py-lineno">104</tt> <tt class="py-line"> <tt class="py-keyword">from</tt> <tt id="link-7" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-7', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-8" class="py-name" targets="Module pyspark.accumulators=pyspark.accumulators-module.html"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-8', 'accumulators', 'link-8');">accumulators</a></tt> <tt class="py-keyword">import</tt> <tt id="link-9" class="py-name"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-9', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt> </tt>
<a name="L105"></a><tt class="py-lineno">105</tt> <tt class="py-line"> <tt class="py-name">accum</tt> <tt class="py-op">=</tt> <tt id="link-10" class="py-name" targets="Class pyspark.accumulators.Accumulator=pyspark.accumulators.Accumulator-class.html"><a title="pyspark.accumulators.Accumulator" class="py-name" href="#" onclick="return doclink('link-10', 'Accumulator', 'link-10');">Accumulator</a></tt><tt class="py-op">(</tt><tt class="py-name">aid</tt><tt class="py-op">,</tt> <tt class="py-name">zero_value</tt><tt class="py-op">,</tt> <tt class="py-name">accum_param</tt><tt class="py-op">)</tt> </tt>
<a name="L106"></a><tt class="py-lineno">106</tt> <tt class="py-line"> <tt class="py-name">accum</tt><tt class="py-op">.</tt><tt class="py-name">_deserialized</tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L107"></a><tt class="py-lineno">107</tt> <tt class="py-line"> <tt id="link-11" class="py-name"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-11', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt><tt class="py-op">[</tt><tt class="py-name">aid</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">accum</tt> </tt>
<a name="L108"></a><tt class="py-lineno">108</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">accum</tt> </tt>
</div><a name="L109"></a><tt class="py-lineno">109</tt> <tt class="py-line"> </tt>
<a name="Accumulator"></a><div id="Accumulator-def"><a name="L110"></a><tt class="py-lineno">110</tt> <tt class="py-line"> </tt>
<a name="L111"></a><tt class="py-lineno">111</tt> <a class="py-toggle" href="#" id="Accumulator-toggle" onclick="return toggle('Accumulator');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html">Accumulator</a><tt class="py-op">(</tt><tt class="py-base-class">object</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="Accumulator-expanded"><a name="L112"></a><tt class="py-lineno">112</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>
<a name="L113"></a><tt class="py-lineno">113</tt> <tt class="py-line"><tt class="py-docstring"> A shared variable that can be accumulated, i.e., has a commutative and associative "add"</tt> </tt>
<a name="L114"></a><tt class="py-lineno">114</tt> <tt class="py-line"><tt class="py-docstring"> operation. Worker tasks on a Spark cluster can add values to an Accumulator with the C{+=}</tt> </tt>
<a name="L115"></a><tt class="py-lineno">115</tt> <tt class="py-line"><tt class="py-docstring"> operator, but only the driver program is allowed to access its value, using C{value}.</tt> </tt>
<a name="L116"></a><tt class="py-lineno">116</tt> <tt class="py-line"><tt class="py-docstring"> Updates from the workers get propagated automatically to the driver program.</tt> </tt>
<a name="L117"></a><tt class="py-lineno">117</tt> <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L118"></a><tt class="py-lineno">118</tt> <tt class="py-line"><tt class="py-docstring"> While C{SparkContext} supports accumulators for primitive data types like C{int} and</tt> </tt>
<a name="L119"></a><tt class="py-lineno">119</tt> <tt class="py-line"><tt class="py-docstring"> C{float}, users can also define accumulators for custom types by providing a custom</tt> </tt>
<a name="L120"></a><tt class="py-lineno">120</tt> <tt class="py-line"><tt class="py-docstring"> L{AccumulatorParam} object. Refer to the doctest of this module for an example.</tt> </tt>
<a name="L121"></a><tt class="py-lineno">121</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>
<a name="L122"></a><tt class="py-lineno">122</tt> <tt class="py-line"> </tt>
<a name="Accumulator.__init__"></a><div id="Accumulator.__init__-def"><a name="L123"></a><tt class="py-lineno">123</tt> <a class="py-toggle" href="#" id="Accumulator.__init__-toggle" onclick="return toggle('Accumulator.__init__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#__init__">__init__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">aid</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">,</tt> <tt class="py-param">accum_param</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.__init__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.__init__-expanded"><a name="L124"></a><tt class="py-lineno">124</tt> <tt class="py-line"> <tt class="py-docstring">"""Create a new Accumulator with a given initial value and AccumulatorParam object"""</tt> </tt>
<a name="L125"></a><tt class="py-lineno">125</tt> <tt class="py-line"> <tt class="py-keyword">from</tt> <tt id="link-12" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-12', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-13" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-13', 'accumulators', 'link-8');">accumulators</a></tt> <tt class="py-keyword">import</tt> <tt id="link-14" class="py-name"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-14', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt> </tt>
<a name="L126"></a><tt class="py-lineno">126</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">aid</tt> <tt class="py-op">=</tt> <tt class="py-name">aid</tt> </tt>
<a name="L127"></a><tt class="py-lineno">127</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">accum_param</tt> <tt class="py-op">=</tt> <tt class="py-name">accum_param</tt> </tt>
<a name="L128"></a><tt class="py-lineno">128</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt> <tt class="py-op">=</tt> <tt id="link-15" class="py-name" targets="Method pyspark.accumulators.Accumulator.value()=pyspark.accumulators.Accumulator-class.html#value"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-15', 'value', 'link-15');">value</a></tt> </tt>
<a name="L129"></a><tt class="py-lineno">129</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_deserialized</tt> <tt class="py-op">=</tt> <tt class="py-name">False</tt> </tt>
<a name="L130"></a><tt class="py-lineno">130</tt> <tt class="py-line"> <tt id="link-16" class="py-name"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-16', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt><tt class="py-op">[</tt><tt class="py-name">aid</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt> </tt>
</div><a name="L131"></a><tt class="py-lineno">131</tt> <tt class="py-line"> </tt>
<a name="Accumulator.__reduce__"></a><div id="Accumulator.__reduce__-def"><a name="L132"></a><tt class="py-lineno">132</tt> <a class="py-toggle" href="#" id="Accumulator.__reduce__-toggle" onclick="return toggle('Accumulator.__reduce__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#__reduce__">__reduce__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.__reduce__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.__reduce__-expanded"><a name="L133"></a><tt class="py-lineno">133</tt> <tt class="py-line"> <tt class="py-docstring">"""Custom serialization; saves the zero value from our AccumulatorParam"""</tt> </tt>
<a name="L134"></a><tt class="py-lineno">134</tt> <tt class="py-line"> <tt class="py-name">param</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">accum_param</tt> </tt>
<a name="L135"></a><tt class="py-lineno">135</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-op">(</tt><tt class="py-name">_deserialize_accumulator</tt><tt class="py-op">,</tt> <tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">aid</tt><tt class="py-op">,</tt> <tt class="py-name">param</tt><tt class="py-op">.</tt><tt id="link-17" class="py-name" targets="Method pyspark.accumulators.AccumulatorParam.zero()=pyspark.accumulators.AccumulatorParam-class.html#zero,Method pyspark.accumulators.AddingAccumulatorParam.zero()=pyspark.accumulators.AddingAccumulatorParam-class.html#zero"><a title="pyspark.accumulators.AccumulatorParam.zero
pyspark.accumulators.AddingAccumulatorParam.zero" class="py-name" href="#" onclick="return doclink('link-17', 'zero', 'link-17');">zero</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">param</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
</div><a name="L136"></a><tt class="py-lineno">136</tt> <tt class="py-line"> </tt>
<a name="L137"></a><tt class="py-lineno">137</tt> <tt class="py-line"> <tt class="py-decorator">@</tt><tt class="py-decorator">property</tt> </tt>
<a name="Accumulator.value"></a><div id="Accumulator.value-def"><a name="L138"></a><tt class="py-lineno">138</tt> <a class="py-toggle" href="#" id="Accumulator.value-toggle" onclick="return toggle('Accumulator.value');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#value">value</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.value-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.value-expanded"><a name="L139"></a><tt class="py-lineno">139</tt> <tt class="py-line"> <tt class="py-docstring">"""Get the accumulator's value; only usable in driver program"""</tt> </tt>
<a name="L140"></a><tt class="py-lineno">140</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_deserialized</tt><tt class="py-op">:</tt> </tt>
<a name="L141"></a><tt class="py-lineno">141</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-name">Exception</tt><tt class="py-op">(</tt><tt class="py-string">"Accumulator.value cannot be accessed inside tasks"</tt><tt class="py-op">)</tt> </tt>
<a name="L142"></a><tt class="py-lineno">142</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt> </tt>
</div><a name="L143"></a><tt class="py-lineno">143</tt> <tt class="py-line"> </tt>
<a name="L144"></a><tt class="py-lineno">144</tt> <tt class="py-line"> <tt class="py-decorator">@</tt><tt class="py-decorator">value</tt><tt class="py-op">.</tt><tt class="py-name">setter</tt> </tt>
<a name="Accumulator.value"></a><div id="Accumulator.value-def"><a name="L145"></a><tt class="py-lineno">145</tt> <a class="py-toggle" href="#" id="Accumulator.value-toggle" onclick="return toggle('Accumulator.value');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#value">value</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.value-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.value-expanded"><a name="L146"></a><tt class="py-lineno">146</tt> <tt class="py-line"> <tt class="py-docstring">"""Sets the accumulator's value; only usable in driver program"""</tt> </tt>
<a name="L147"></a><tt class="py-lineno">147</tt> <tt class="py-line"> <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_deserialized</tt><tt class="py-op">:</tt> </tt>
<a name="L148"></a><tt class="py-lineno">148</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-name">Exception</tt><tt class="py-op">(</tt><tt class="py-string">"Accumulator.value cannot be accessed inside tasks"</tt><tt class="py-op">)</tt> </tt>
<a name="L149"></a><tt class="py-lineno">149</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt> <tt class="py-op">=</tt> <tt id="link-18" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-18', 'value', 'link-15');">value</a></tt> </tt>
</div><a name="L150"></a><tt class="py-lineno">150</tt> <tt class="py-line"> </tt>
<a name="Accumulator.add"></a><div id="Accumulator.add-def"><a name="L151"></a><tt class="py-lineno">151</tt> <a class="py-toggle" href="#" id="Accumulator.add-toggle" onclick="return toggle('Accumulator.add');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#add">add</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">term</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.add-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.add-expanded"><a name="L152"></a><tt class="py-lineno">152</tt> <tt class="py-line"> <tt class="py-docstring">"""Adds a term to this accumulator's value"""</tt> </tt>
<a name="L153"></a><tt class="py-lineno">153</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">accum_param</tt><tt class="py-op">.</tt><tt id="link-19" class="py-name" targets="Method pyspark.accumulators.AccumulatorParam.addInPlace()=pyspark.accumulators.AccumulatorParam-class.html#addInPlace,Method pyspark.accumulators.AddingAccumulatorParam.addInPlace()=pyspark.accumulators.AddingAccumulatorParam-class.html#addInPlace"><a title="pyspark.accumulators.AccumulatorParam.addInPlace
pyspark.accumulators.AddingAccumulatorParam.addInPlace" class="py-name" href="#" onclick="return doclink('link-19', 'addInPlace', 'link-19');">addInPlace</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt><tt class="py-op">,</tt> <tt class="py-name">term</tt><tt class="py-op">)</tt> </tt>
</div><a name="L154"></a><tt class="py-lineno">154</tt> <tt class="py-line"> </tt>
<a name="Accumulator.__iadd__"></a><div id="Accumulator.__iadd__-def"><a name="L155"></a><tt class="py-lineno">155</tt> <a class="py-toggle" href="#" id="Accumulator.__iadd__-toggle" onclick="return toggle('Accumulator.__iadd__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#__iadd__">__iadd__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">term</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.__iadd__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.__iadd__-expanded"><a name="L156"></a><tt class="py-lineno">156</tt> <tt class="py-line"> <tt class="py-docstring">"""The += operator; adds a term to this accumulator's value"""</tt> </tt>
<a name="L157"></a><tt class="py-lineno">157</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-20" class="py-name" targets="Method pyspark.accumulators.Accumulator.add()=pyspark.accumulators.Accumulator-class.html#add"><a title="pyspark.accumulators.Accumulator.add" class="py-name" href="#" onclick="return doclink('link-20', 'add', 'link-20');">add</a></tt><tt class="py-op">(</tt><tt class="py-name">term</tt><tt class="py-op">)</tt> </tt>
<a name="L158"></a><tt class="py-lineno">158</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">self</tt> </tt>
</div><a name="L159"></a><tt class="py-lineno">159</tt> <tt class="py-line"> </tt>
<a name="Accumulator.__str__"></a><div id="Accumulator.__str__-def"><a name="L160"></a><tt class="py-lineno">160</tt> <a class="py-toggle" href="#" id="Accumulator.__str__-toggle" onclick="return toggle('Accumulator.__str__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#__str__">__str__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.__str__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.__str__-expanded"><a name="L161"></a><tt class="py-lineno">161</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">str</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt><tt class="py-op">)</tt> </tt>
</div><a name="L162"></a><tt class="py-lineno">162</tt> <tt class="py-line"> </tt>
<a name="Accumulator.__repr__"></a><div id="Accumulator.__repr__-def"><a name="L163"></a><tt class="py-lineno">163</tt> <a class="py-toggle" href="#" id="Accumulator.__repr__-toggle" onclick="return toggle('Accumulator.__repr__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.Accumulator-class.html#__repr__">__repr__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="Accumulator.__repr__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="Accumulator.__repr__-expanded"><a name="L164"></a><tt class="py-lineno">164</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-string">"Accumulator<id=%i, value=%s>"</tt> <tt class="py-op">%</tt> <tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">aid</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_value</tt><tt class="py-op">)</tt> </tt>
</div></div><a name="L165"></a><tt class="py-lineno">165</tt> <tt class="py-line"> </tt>
<a name="AccumulatorParam"></a><div id="AccumulatorParam-def"><a name="L166"></a><tt class="py-lineno">166</tt> <tt class="py-line"> </tt>
<a name="L167"></a><tt class="py-lineno">167</tt> <a class="py-toggle" href="#" id="AccumulatorParam-toggle" onclick="return toggle('AccumulatorParam');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="pyspark.accumulators.AccumulatorParam-class.html">AccumulatorParam</a><tt class="py-op">(</tt><tt class="py-base-class">object</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AccumulatorParam-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="AccumulatorParam-expanded"><a name="L168"></a><tt class="py-lineno">168</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>
<a name="L169"></a><tt class="py-lineno">169</tt> <tt class="py-line"><tt class="py-docstring"> Helper object that defines how to accumulate values of a given type.</tt> </tt>
<a name="L170"></a><tt class="py-lineno">170</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>
<a name="L171"></a><tt class="py-lineno">171</tt> <tt class="py-line"> </tt>
<a name="AccumulatorParam.zero"></a><div id="AccumulatorParam.zero-def"><a name="L172"></a><tt class="py-lineno">172</tt> <a class="py-toggle" href="#" id="AccumulatorParam.zero-toggle" onclick="return toggle('AccumulatorParam.zero');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.AccumulatorParam-class.html#zero">zero</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AccumulatorParam.zero-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="AccumulatorParam.zero-expanded"><a name="L173"></a><tt class="py-lineno">173</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>
<a name="L174"></a><tt class="py-lineno">174</tt> <tt class="py-line"><tt class="py-docstring"> Provide a "zero value" for the type, compatible in dimensions with the</tt> </tt>
<a name="L175"></a><tt class="py-lineno">175</tt> <tt class="py-line"><tt class="py-docstring"> provided C{value} (e.g., a zero vector)</tt> </tt>
<a name="L176"></a><tt class="py-lineno">176</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>
<a name="L177"></a><tt class="py-lineno">177</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-name">NotImplementedError</tt> </tt>
</div><a name="L178"></a><tt class="py-lineno">178</tt> <tt class="py-line"> </tt>
<a name="AccumulatorParam.addInPlace"></a><div id="AccumulatorParam.addInPlace-def"><a name="L179"></a><tt class="py-lineno">179</tt> <a class="py-toggle" href="#" id="AccumulatorParam.addInPlace-toggle" onclick="return toggle('AccumulatorParam.addInPlace');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.AccumulatorParam-class.html#addInPlace">addInPlace</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value1</tt><tt class="py-op">,</tt> <tt class="py-param">value2</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AccumulatorParam.addInPlace-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="AccumulatorParam.addInPlace-expanded"><a name="L180"></a><tt class="py-lineno">180</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>
<a name="L181"></a><tt class="py-lineno">181</tt> <tt class="py-line"><tt class="py-docstring"> Add two values of the accumulator's data type, returning a new value;</tt> </tt>
<a name="L182"></a><tt class="py-lineno">182</tt> <tt class="py-line"><tt class="py-docstring"> for efficiency, can also update C{value1} in place and return it.</tt> </tt>
<a name="L183"></a><tt class="py-lineno">183</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>
<a name="L184"></a><tt class="py-lineno">184</tt> <tt class="py-line"> <tt class="py-keyword">raise</tt> <tt class="py-name">NotImplementedError</tt> </tt>
</div></div><a name="L185"></a><tt class="py-lineno">185</tt> <tt class="py-line"> </tt>
<a name="AddingAccumulatorParam"></a><div id="AddingAccumulatorParam-def"><a name="L186"></a><tt class="py-lineno">186</tt> <tt class="py-line"> </tt>
<a name="L187"></a><tt class="py-lineno">187</tt> <a class="py-toggle" href="#" id="AddingAccumulatorParam-toggle" onclick="return toggle('AddingAccumulatorParam');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="pyspark.accumulators.AddingAccumulatorParam-class.html">AddingAccumulatorParam</a><tt class="py-op">(</tt><tt class="py-base-class">AccumulatorParam</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AddingAccumulatorParam-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="AddingAccumulatorParam-expanded"><a name="L188"></a><tt class="py-lineno">188</tt> <tt class="py-line"> <tt class="py-docstring">"""</tt> </tt>
<a name="L189"></a><tt class="py-lineno">189</tt> <tt class="py-line"><tt class="py-docstring"> An AccumulatorParam that uses the + operators to add values. Designed for simple types</tt> </tt>
<a name="L190"></a><tt class="py-lineno">190</tt> <tt class="py-line"><tt class="py-docstring"> such as integers, floats, and lists. Requires the zero value for the underlying type</tt> </tt>
<a name="L191"></a><tt class="py-lineno">191</tt> <tt class="py-line"><tt class="py-docstring"> as a parameter.</tt> </tt>
<a name="L192"></a><tt class="py-lineno">192</tt> <tt class="py-line"><tt class="py-docstring"> """</tt> </tt>
<a name="L193"></a><tt class="py-lineno">193</tt> <tt class="py-line"> </tt>
<a name="AddingAccumulatorParam.__init__"></a><div id="AddingAccumulatorParam.__init__-def"><a name="L194"></a><tt class="py-lineno">194</tt> <a class="py-toggle" href="#" id="AddingAccumulatorParam.__init__-toggle" onclick="return toggle('AddingAccumulatorParam.__init__');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.AddingAccumulatorParam-class.html#__init__">__init__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">zero_value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AddingAccumulatorParam.__init__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="AddingAccumulatorParam.__init__-expanded"><a name="L195"></a><tt class="py-lineno">195</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">zero_value</tt> <tt class="py-op">=</tt> <tt class="py-name">zero_value</tt> </tt>
</div><a name="L196"></a><tt class="py-lineno">196</tt> <tt class="py-line"> </tt>
<a name="AddingAccumulatorParam.zero"></a><div id="AddingAccumulatorParam.zero-def"><a name="L197"></a><tt class="py-lineno">197</tt> <a class="py-toggle" href="#" id="AddingAccumulatorParam.zero-toggle" onclick="return toggle('AddingAccumulatorParam.zero');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.AddingAccumulatorParam-class.html#zero">zero</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AddingAccumulatorParam.zero-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="AddingAccumulatorParam.zero-expanded"><a name="L198"></a><tt class="py-lineno">198</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">zero_value</tt> </tt>
</div><a name="L199"></a><tt class="py-lineno">199</tt> <tt class="py-line"> </tt>
<a name="AddingAccumulatorParam.addInPlace"></a><div id="AddingAccumulatorParam.addInPlace-def"><a name="L200"></a><tt class="py-lineno">200</tt> <a class="py-toggle" href="#" id="AddingAccumulatorParam.addInPlace-toggle" onclick="return toggle('AddingAccumulatorParam.addInPlace');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators.AddingAccumulatorParam-class.html#addInPlace">addInPlace</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value1</tt><tt class="py-op">,</tt> <tt class="py-param">value2</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="AddingAccumulatorParam.addInPlace-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="AddingAccumulatorParam.addInPlace-expanded"><a name="L201"></a><tt class="py-lineno">201</tt> <tt class="py-line"> <tt class="py-name">value1</tt> <tt class="py-op">+=</tt> <tt class="py-name">value2</tt> </tt>
<a name="L202"></a><tt class="py-lineno">202</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">value1</tt> </tt>
</div></div><a name="L203"></a><tt class="py-lineno">203</tt> <tt class="py-line"> </tt>
<a name="L204"></a><tt class="py-lineno">204</tt> <tt class="py-line"> </tt>
<a name="L205"></a><tt class="py-lineno">205</tt> <tt class="py-line"><tt class="py-comment"># Singleton accumulator params for some standard types</tt> </tt>
<a name="L206"></a><tt class="py-lineno">206</tt> <tt class="py-line"><tt id="link-21" class="py-name" targets="Variable pyspark.accumulators.INT_ACCUMULATOR_PARAM=pyspark.accumulators-module.html#INT_ACCUMULATOR_PARAM"><a title="pyspark.accumulators.INT_ACCUMULATOR_PARAM" class="py-name" href="#" onclick="return doclink('link-21', 'INT_ACCUMULATOR_PARAM', 'link-21');">INT_ACCUMULATOR_PARAM</a></tt> <tt class="py-op">=</tt> <tt id="link-22" class="py-name" targets="Class pyspark.accumulators.AddingAccumulatorParam=pyspark.accumulators.AddingAccumulatorParam-class.html"><a title="pyspark.accumulators.AddingAccumulatorParam" class="py-name" href="#" onclick="return doclink('link-22', 'AddingAccumulatorParam', 'link-22');">AddingAccumulatorParam</a></tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">)</tt> </tt>
<a name="L207"></a><tt class="py-lineno">207</tt> <tt class="py-line"><tt id="link-23" class="py-name" targets="Variable pyspark.accumulators.FLOAT_ACCUMULATOR_PARAM=pyspark.accumulators-module.html#FLOAT_ACCUMULATOR_PARAM"><a title="pyspark.accumulators.FLOAT_ACCUMULATOR_PARAM" class="py-name" href="#" onclick="return doclink('link-23', 'FLOAT_ACCUMULATOR_PARAM', 'link-23');">FLOAT_ACCUMULATOR_PARAM</a></tt> <tt class="py-op">=</tt> <tt id="link-24" class="py-name"><a title="pyspark.accumulators.AddingAccumulatorParam" class="py-name" href="#" onclick="return doclink('link-24', 'AddingAccumulatorParam', 'link-22');">AddingAccumulatorParam</a></tt><tt class="py-op">(</tt><tt class="py-number">0.0</tt><tt class="py-op">)</tt> </tt>
<a name="L208"></a><tt class="py-lineno">208</tt> <tt class="py-line"><tt id="link-25" class="py-name" targets="Variable pyspark.accumulators.COMPLEX_ACCUMULATOR_PARAM=pyspark.accumulators-module.html#COMPLEX_ACCUMULATOR_PARAM"><a title="pyspark.accumulators.COMPLEX_ACCUMULATOR_PARAM" class="py-name" href="#" onclick="return doclink('link-25', 'COMPLEX_ACCUMULATOR_PARAM', 'link-25');">COMPLEX_ACCUMULATOR_PARAM</a></tt> <tt class="py-op">=</tt> <tt id="link-26" class="py-name"><a title="pyspark.accumulators.AddingAccumulatorParam" class="py-name" href="#" onclick="return doclink('link-26', 'AddingAccumulatorParam', 'link-22');">AddingAccumulatorParam</a></tt><tt class="py-op">(</tt><tt class="py-number">0.0j</tt><tt class="py-op">)</tt> </tt>
<a name="_UpdateRequestHandler"></a><div id="_UpdateRequestHandler-def"><a name="L209"></a><tt class="py-lineno">209</tt> <tt class="py-line"> </tt>
<a name="L210"></a><tt class="py-lineno">210</tt> <tt class="py-line"> </tt>
<a name="L211"></a><tt class="py-lineno">211</tt> <a class="py-toggle" href="#" id="_UpdateRequestHandler-toggle" onclick="return toggle('_UpdateRequestHandler');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="pyspark.accumulators._UpdateRequestHandler-class.html">_UpdateRequestHandler</a><tt class="py-op">(</tt><tt class="py-base-class">SocketServer</tt><tt class="py-op">.</tt><tt class="py-base-class">StreamRequestHandler</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_UpdateRequestHandler-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_UpdateRequestHandler-expanded"><a name="_UpdateRequestHandler.handle"></a><div id="_UpdateRequestHandler.handle-def"><a name="L212"></a><tt class="py-lineno">212</tt> <a class="py-toggle" href="#" id="_UpdateRequestHandler.handle-toggle" onclick="return toggle('_UpdateRequestHandler.handle');">-</a><tt class="py-line"> <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators._UpdateRequestHandler-class.html#handle">handle</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_UpdateRequestHandler.handle-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="_UpdateRequestHandler.handle-expanded"><a name="L213"></a><tt class="py-lineno">213</tt> <tt class="py-line"> <tt class="py-keyword">from</tt> <tt id="link-27" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-27', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-28" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-28', 'accumulators', 'link-8');">accumulators</a></tt> <tt class="py-keyword">import</tt> <tt id="link-29" class="py-name"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-29', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt> </tt>
<a name="L214"></a><tt class="py-lineno">214</tt> <tt class="py-line"> <tt class="py-name">num_updates</tt> <tt class="py-op">=</tt> <tt class="py-name">read_int</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">rfile</tt><tt class="py-op">)</tt> </tt>
<a name="L215"></a><tt class="py-lineno">215</tt> <tt class="py-line"> <tt class="py-keyword">for</tt> <tt class="py-name">_</tt> <tt class="py-keyword">in</tt> <tt class="py-name">range</tt><tt class="py-op">(</tt><tt class="py-name">num_updates</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L216"></a><tt class="py-lineno">216</tt> <tt class="py-line"> <tt class="py-op">(</tt><tt class="py-name">aid</tt><tt class="py-op">,</tt> <tt class="py-name">update</tt><tt class="py-op">)</tt> <tt class="py-op">=</tt> <tt id="link-30" class="py-name"><a title="pyspark.accumulators.pickleSer" class="py-name" href="#" onclick="return doclink('link-30', 'pickleSer', 'link-4');">pickleSer</a></tt><tt class="py-op">.</tt><tt class="py-name">_read_with_length</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">rfile</tt><tt class="py-op">)</tt> </tt>
<a name="L217"></a><tt class="py-lineno">217</tt> <tt class="py-line"> <tt id="link-31" class="py-name"><a title="pyspark.accumulators._accumulatorRegistry" class="py-name" href="#" onclick="return doclink('link-31', '_accumulatorRegistry', 'link-6');">_accumulatorRegistry</a></tt><tt class="py-op">[</tt><tt class="py-name">aid</tt><tt class="py-op">]</tt> <tt class="py-op">+=</tt> <tt class="py-name">update</tt> </tt>
<a name="L218"></a><tt class="py-lineno">218</tt> <tt class="py-line"> <tt class="py-comment"># Write a byte in acknowledgement</tt> </tt>
<a name="L219"></a><tt class="py-lineno">219</tt> <tt class="py-line"> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">wfile</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">struct</tt><tt class="py-op">.</tt><tt class="py-name">pack</tt><tt class="py-op">(</tt><tt class="py-string">"!b"</tt><tt class="py-op">,</tt> <tt class="py-number">1</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
</div></div><a name="L220"></a><tt class="py-lineno">220</tt> <tt class="py-line"> </tt>
<a name="_start_update_server"></a><div id="_start_update_server-def"><a name="L221"></a><tt class="py-lineno">221</tt> <tt class="py-line"> </tt>
<a name="L222"></a><tt class="py-lineno">222</tt> <a class="py-toggle" href="#" id="_start_update_server-toggle" onclick="return toggle('_start_update_server');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.accumulators-module.html#_start_update_server">_start_update_server</a><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_start_update_server-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_start_update_server-expanded"><a name="L223"></a><tt class="py-lineno">223</tt> <tt class="py-line"> <tt class="py-docstring">"""Start a TCP server to receive accumulator updates in a daemon thread, and returns it"""</tt> </tt>
<a name="L224"></a><tt class="py-lineno">224</tt> <tt class="py-line"> <tt class="py-name">server</tt> <tt class="py-op">=</tt> <tt class="py-name">SocketServer</tt><tt class="py-op">.</tt><tt class="py-name">TCPServer</tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-string">"localhost"</tt><tt class="py-op">,</tt> <tt class="py-number">0</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">_UpdateRequestHandler</tt><tt class="py-op">)</tt> </tt>
<a name="L225"></a><tt class="py-lineno">225</tt> <tt class="py-line"> <tt class="py-name">thread</tt> <tt class="py-op">=</tt> <tt class="py-name">threading</tt><tt class="py-op">.</tt><tt class="py-name">Thread</tt><tt class="py-op">(</tt><tt class="py-name">target</tt><tt class="py-op">=</tt><tt class="py-name">server</tt><tt class="py-op">.</tt><tt class="py-name">serve_forever</tt><tt class="py-op">)</tt> </tt>
<a name="L226"></a><tt class="py-lineno">226</tt> <tt class="py-line"> <tt class="py-name">thread</tt><tt class="py-op">.</tt><tt class="py-name">daemon</tt> <tt class="py-op">=</tt> <tt class="py-name">True</tt> </tt>
<a name="L227"></a><tt class="py-lineno">227</tt> <tt class="py-line"> <tt class="py-name">thread</tt><tt class="py-op">.</tt><tt class="py-name">start</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L228"></a><tt class="py-lineno">228</tt> <tt class="py-line"> <tt class="py-keyword">return</tt> <tt class="py-name">server</tt> </tt>
</div><a name="L229"></a><tt class="py-lineno">229</tt> <tt class="py-line"> </tt><script type="text/javascript">
<!--
expandto(location.href);
// -->
</script>
</pre>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
bgcolor="#a0c0ff" cellspacing="0">
<tr valign="middle">
<!-- Home link -->
<th> <a
href="pyspark-module.html">Home</a> </th>
<!-- Tree link -->
<th> <a
href="module-tree.html">Trees</a> </th>
<!-- Index link -->
<th> <a
href="identifier-index.html">Indices</a> </th>
<!-- Help link -->
<th> <a
href="help.html">Help</a> </th>
<!-- Project homepage -->
<th class="navbar" align="right" width="100%">
<table border="0" cellpadding="0" cellspacing="0">
<tr><th class="navbar" align="center"
><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
</tr></table></th>
</tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
<tr>
<td align="left" class="footer">
Generated by Epydoc 3.0.1 on Thu Jul 17 20:36:18 2014
</td>
<td align="right" class="footer">
<a target="mainFrame" href="http://epydoc.sourceforge.net"
>http://epydoc.sourceforge.net</a>
</td>
</tr>
</table>
<script type="text/javascript">
<!--
// Private objects are initially displayed (because if
// javascript is turned off then we want them to be
// visible); but by default, we want to hide them. So hide
// them unless we have a cookie that says to show them.
checkCookie();
// -->
</script>
</body>
</html>
|