summaryrefslogtreecommitdiff
path: root/site/docs/1.0.1/api/python/pyspark.context-pysrc.html
blob: 51156654908cc8c1d73736e1a6e7cbd5c57d002f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>pyspark.context</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark.apache.org">Spark 1.0.0 Python API Docs</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        <a href="pyspark-module.html">Package&nbsp;pyspark</a> ::
        Module&nbsp;context
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="pyspark.context-pysrc.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<h1 class="epydoc">Source Code for <a href="pyspark.context-module.html">Module pyspark.context</a></h1>
<pre class="py-src">
<a name="L1"></a><tt class="py-lineno">  1</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L2"></a><tt class="py-lineno">  2</tt>  <tt class="py-line"><tt class="py-comment"># Licensed to the Apache Software Foundation (ASF) under one or more</tt> </tt>
<a name="L3"></a><tt class="py-lineno">  3</tt>  <tt class="py-line"><tt class="py-comment"># contributor license agreements.  See the NOTICE file distributed with</tt> </tt>
<a name="L4"></a><tt class="py-lineno">  4</tt>  <tt class="py-line"><tt class="py-comment"># this work for additional information regarding copyright ownership.</tt> </tt>
<a name="L5"></a><tt class="py-lineno">  5</tt>  <tt class="py-line"><tt class="py-comment"># The ASF licenses this file to You under the Apache License, Version 2.0</tt> </tt>
<a name="L6"></a><tt class="py-lineno">  6</tt>  <tt class="py-line"><tt class="py-comment"># (the "License"); you may not use this file except in compliance with</tt> </tt>
<a name="L7"></a><tt class="py-lineno">  7</tt>  <tt class="py-line"><tt class="py-comment"># the License.  You may obtain a copy of the License at</tt> </tt>
<a name="L8"></a><tt class="py-lineno">  8</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L9"></a><tt class="py-lineno">  9</tt>  <tt class="py-line"><tt class="py-comment">#    http://www.apache.org/licenses/LICENSE-2.0</tt> </tt>
<a name="L10"></a><tt class="py-lineno"> 10</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L11"></a><tt class="py-lineno"> 11</tt>  <tt class="py-line"><tt class="py-comment"># Unless required by applicable law or agreed to in writing, software</tt> </tt>
<a name="L12"></a><tt class="py-lineno"> 12</tt>  <tt class="py-line"><tt class="py-comment"># distributed under the License is distributed on an "AS IS" BASIS,</tt> </tt>
<a name="L13"></a><tt class="py-lineno"> 13</tt>  <tt class="py-line"><tt class="py-comment"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</tt> </tt>
<a name="L14"></a><tt class="py-lineno"> 14</tt>  <tt class="py-line"><tt class="py-comment"># See the License for the specific language governing permissions and</tt> </tt>
<a name="L15"></a><tt class="py-lineno"> 15</tt>  <tt class="py-line"><tt class="py-comment"># limitations under the License.</tt> </tt>
<a name="L16"></a><tt class="py-lineno"> 16</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L17"></a><tt class="py-lineno"> 17</tt>  <tt class="py-line"> </tt>
<a name="L18"></a><tt class="py-lineno"> 18</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">os</tt> </tt>
<a name="L19"></a><tt class="py-lineno"> 19</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">shutil</tt> </tt>
<a name="L20"></a><tt class="py-lineno"> 20</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">sys</tt> </tt>
<a name="L21"></a><tt class="py-lineno"> 21</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">threading</tt> <tt class="py-keyword">import</tt> <tt class="py-name">Lock</tt> </tt>
<a name="L22"></a><tt class="py-lineno"> 22</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">tempfile</tt> <tt class="py-keyword">import</tt> <tt class="py-name">NamedTemporaryFile</tt> </tt>
<a name="L23"></a><tt class="py-lineno"> 23</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">collections</tt> <tt class="py-keyword">import</tt> <tt class="py-name">namedtuple</tt> </tt>
<a name="L24"></a><tt class="py-lineno"> 24</tt>  <tt class="py-line"> </tt>
<a name="L25"></a><tt class="py-lineno"> 25</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-0" class="py-name" targets="Package pyspark=pyspark-module.html"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-0', 'pyspark', 'link-0');">pyspark</a></tt> <tt class="py-keyword">import</tt> <tt id="link-1" class="py-name" targets="Module pyspark.accumulators=pyspark.accumulators-module.html"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-1', 'accumulators', 'link-1');">accumulators</a></tt> </tt>
<a name="L26"></a><tt class="py-lineno"> 26</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-2" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-2', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-3" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-3', 'accumulators', 'link-1');">accumulators</a></tt> <tt class="py-keyword">import</tt> <tt id="link-4" class="py-name" targets="Class pyspark.accumulators.Accumulator=pyspark.accumulators.Accumulator-class.html"><a title="pyspark.accumulators.Accumulator" class="py-name" href="#" onclick="return doclink('link-4', 'Accumulator', 'link-4');">Accumulator</a></tt> </tt>
<a name="L27"></a><tt class="py-lineno"> 27</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-5" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-5', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-6" class="py-name" targets="Module pyspark.broadcast=pyspark.broadcast-module.html,Method pyspark.context.SparkContext.broadcast()=pyspark.context.SparkContext-class.html#broadcast"><a title="pyspark.broadcast
pyspark.context.SparkContext.broadcast" class="py-name" href="#" onclick="return doclink('link-6', 'broadcast', 'link-6');">broadcast</a></tt> <tt class="py-keyword">import</tt> <tt id="link-7" class="py-name" targets="Class pyspark.broadcast.Broadcast=pyspark.broadcast.Broadcast-class.html"><a title="pyspark.broadcast.Broadcast" class="py-name" href="#" onclick="return doclink('link-7', 'Broadcast', 'link-7');">Broadcast</a></tt> </tt>
<a name="L28"></a><tt class="py-lineno"> 28</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-8" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-8', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-9" class="py-name" targets="Module pyspark.conf=pyspark.conf-module.html"><a title="pyspark.conf" class="py-name" href="#" onclick="return doclink('link-9', 'conf', 'link-9');">conf</a></tt> <tt class="py-keyword">import</tt> <tt id="link-10" class="py-name" targets="Class pyspark.conf.SparkConf=pyspark.conf.SparkConf-class.html"><a title="pyspark.conf.SparkConf" class="py-name" href="#" onclick="return doclink('link-10', 'SparkConf', 'link-10');">SparkConf</a></tt> </tt>
<a name="L29"></a><tt class="py-lineno"> 29</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-11" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-11', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-12" class="py-name" targets="Module pyspark.files=pyspark.files-module.html"><a title="pyspark.files" class="py-name" href="#" onclick="return doclink('link-12', 'files', 'link-12');">files</a></tt> <tt class="py-keyword">import</tt> <tt id="link-13" class="py-name" targets="Class pyspark.files.SparkFiles=pyspark.files.SparkFiles-class.html"><a title="pyspark.files.SparkFiles" class="py-name" href="#" onclick="return doclink('link-13', 'SparkFiles', 'link-13');">SparkFiles</a></tt> </tt>
<a name="L30"></a><tt class="py-lineno"> 30</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-14" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-14', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt class="py-name">java_gateway</tt> <tt class="py-keyword">import</tt> <tt class="py-name">launch_gateway</tt> </tt>
<a name="L31"></a><tt class="py-lineno"> 31</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-15" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-15', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-16" class="py-name" targets="Module pyspark.serializers=pyspark.serializers-module.html"><a title="pyspark.serializers" class="py-name" href="#" onclick="return doclink('link-16', 'serializers', 'link-16');">serializers</a></tt> <tt class="py-keyword">import</tt> <tt id="link-17" class="py-name" targets="Class pyspark.serializers.PickleSerializer=pyspark.serializers.PickleSerializer-class.html"><a title="pyspark.serializers.PickleSerializer" class="py-name" href="#" onclick="return doclink('link-17', 'PickleSerializer', 'link-17');">PickleSerializer</a></tt><tt class="py-op">,</tt> <tt class="py-name">BatchedSerializer</tt><tt class="py-op">,</tt> <tt class="py-name">UTF8Deserializer</tt><tt class="py-op">,</tt> \ </tt>
<a name="L32"></a><tt class="py-lineno"> 32</tt>  <tt class="py-line">        <tt class="py-name">PairDeserializer</tt> </tt>
<a name="L33"></a><tt class="py-lineno"> 33</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-18" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-18', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-19" class="py-name" targets="Module pyspark.storagelevel=pyspark.storagelevel-module.html"><a title="pyspark.storagelevel" class="py-name" href="#" onclick="return doclink('link-19', 'storagelevel', 'link-19');">storagelevel</a></tt> <tt class="py-keyword">import</tt> <tt id="link-20" class="py-name" targets="Class pyspark.storagelevel.StorageLevel=pyspark.storagelevel.StorageLevel-class.html"><a title="pyspark.storagelevel.StorageLevel" class="py-name" href="#" onclick="return doclink('link-20', 'StorageLevel', 'link-20');">StorageLevel</a></tt> </tt>
<a name="L34"></a><tt class="py-lineno"> 34</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-21" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-21', 'pyspark', 'link-0');">pyspark</a></tt> <tt class="py-keyword">import</tt> <tt id="link-22" class="py-name" targets="Module pyspark.rdd=pyspark.rdd-module.html"><a title="pyspark.rdd" class="py-name" href="#" onclick="return doclink('link-22', 'rdd', 'link-22');">rdd</a></tt> </tt>
<a name="L35"></a><tt class="py-lineno"> 35</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-23" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-23', 'pyspark', 'link-0');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-24" class="py-name"><a title="pyspark.rdd" class="py-name" href="#" onclick="return doclink('link-24', 'rdd', 'link-22');">rdd</a></tt> <tt class="py-keyword">import</tt> <tt id="link-25" class="py-name" targets="Class pyspark.rdd.RDD=pyspark.rdd.RDD-class.html"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-25', 'RDD', 'link-25');">RDD</a></tt> </tt>
<a name="L36"></a><tt class="py-lineno"> 36</tt>  <tt class="py-line"> </tt>
<a name="L37"></a><tt class="py-lineno"> 37</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">py4j</tt><tt class="py-op">.</tt><tt class="py-name">java_collections</tt> <tt class="py-keyword">import</tt> <tt class="py-name">ListConverter</tt> </tt>
<a name="SparkContext"></a><div id="SparkContext-def"><a name="L38"></a><tt class="py-lineno"> 38</tt>  <tt class="py-line"> </tt>
<a name="L39"></a><tt class="py-lineno"> 39</tt>  <tt class="py-line"> </tt>
<a name="L40"></a><tt class="py-lineno"> 40</tt> <a class="py-toggle" href="#" id="SparkContext-toggle" onclick="return toggle('SparkContext');">-</a><tt class="py-line"><tt class="py-keyword">class</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html">SparkContext</a><tt class="py-op">(</tt><tt class="py-base-class">object</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="SparkContext-expanded"><a name="L41"></a><tt class="py-lineno"> 41</tt>  <tt class="py-line">    <tt class="py-docstring">"""</tt> </tt>
<a name="L42"></a><tt class="py-lineno"> 42</tt>  <tt class="py-line"><tt class="py-docstring">    Main entry point for Spark functionality. A SparkContext represents the</tt> </tt>
<a name="L43"></a><tt class="py-lineno"> 43</tt>  <tt class="py-line"><tt class="py-docstring">    connection to a Spark cluster, and can be used to create L{RDD}s and</tt> </tt>
<a name="L44"></a><tt class="py-lineno"> 44</tt>  <tt class="py-line"><tt class="py-docstring">    broadcast variables on that cluster.</tt> </tt>
<a name="L45"></a><tt class="py-lineno"> 45</tt>  <tt class="py-line"><tt class="py-docstring">    """</tt> </tt>
<a name="L46"></a><tt class="py-lineno"> 46</tt>  <tt class="py-line"> </tt>
<a name="L47"></a><tt class="py-lineno"> 47</tt>  <tt class="py-line">    <tt id="link-26" class="py-name" targets="Variable pyspark.context.SparkContext._gateway=pyspark.context.SparkContext-class.html#_gateway"><a title="pyspark.context.SparkContext._gateway" class="py-name" href="#" onclick="return doclink('link-26', '_gateway', 'link-26');">_gateway</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L48"></a><tt class="py-lineno"> 48</tt>  <tt class="py-line">    <tt id="link-27" class="py-name" targets="Variable pyspark.context.SparkContext._jvm=pyspark.context.SparkContext-class.html#_jvm"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-27', '_jvm', 'link-27');">_jvm</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L49"></a><tt class="py-lineno"> 49</tt>  <tt class="py-line">    <tt id="link-28" class="py-name" targets="Variable pyspark.context.SparkContext._writeToFile=pyspark.context.SparkContext-class.html#_writeToFile"><a title="pyspark.context.SparkContext._writeToFile" class="py-name" href="#" onclick="return doclink('link-28', '_writeToFile', 'link-28');">_writeToFile</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L50"></a><tt class="py-lineno"> 50</tt>  <tt class="py-line">    <tt id="link-29" class="py-name" targets="Variable pyspark.context.SparkContext._next_accum_id=pyspark.context.SparkContext-class.html#_next_accum_id"><a title="pyspark.context.SparkContext._next_accum_id" class="py-name" href="#" onclick="return doclink('link-29', '_next_accum_id', 'link-29');">_next_accum_id</a></tt> <tt class="py-op">=</tt> <tt class="py-number">0</tt> </tt>
<a name="L51"></a><tt class="py-lineno"> 51</tt>  <tt class="py-line">    <tt id="link-30" class="py-name" targets="Variable pyspark.context.SparkContext._active_spark_context=pyspark.context.SparkContext-class.html#_active_spark_context"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-30', '_active_spark_context', 'link-30');">_active_spark_context</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L52"></a><tt class="py-lineno"> 52</tt>  <tt class="py-line">    <tt id="link-31" class="py-name" targets="Variable pyspark.context.SparkContext._lock=pyspark.context.SparkContext-class.html#_lock"><a title="pyspark.context.SparkContext._lock" class="py-name" href="#" onclick="return doclink('link-31', '_lock', 'link-31');">_lock</a></tt> <tt class="py-op">=</tt> <tt class="py-name">Lock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L53"></a><tt class="py-lineno"> 53</tt>  <tt class="py-line">    <tt id="link-32" class="py-name" targets="Variable pyspark.context.SparkContext._python_includes=pyspark.context.SparkContext-class.html#_python_includes"><a title="pyspark.context.SparkContext._python_includes" class="py-name" href="#" onclick="return doclink('link-32', '_python_includes', 'link-32');">_python_includes</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> <tt class="py-comment"># zip and egg files that need to be added to PYTHONPATH</tt> </tt>
<a name="L54"></a><tt class="py-lineno"> 54</tt>  <tt class="py-line"> </tt>
<a name="L55"></a><tt class="py-lineno"> 55</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.__init__"></a><div id="SparkContext.__init__-def"><a name="L56"></a><tt class="py-lineno"> 56</tt> <a class="py-toggle" href="#" id="SparkContext.__init__-toggle" onclick="return toggle('SparkContext.__init__');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#__init__">__init__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">master</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-param">appName</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-param">sparkHome</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-param">pyFiles</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> </tt>
<a name="L57"></a><tt class="py-lineno"> 57</tt>  <tt class="py-line">        <tt class="py-param">environment</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-param">batchSize</tt><tt class="py-op">=</tt><tt class="py-number">1024</tt><tt class="py-op">,</tt> <tt class="py-param">serializer</tt><tt class="py-op">=</tt><tt id="link-33" class="py-name"><a title="pyspark.serializers.PickleSerializer" class="py-name" href="#" onclick="return doclink('link-33', 'PickleSerializer', 'link-17');">PickleSerializer</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-param">conf</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> </tt>
<a name="L58"></a><tt class="py-lineno"> 58</tt>  <tt class="py-line">        <tt class="py-param">gateway</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.__init__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.__init__-expanded"><a name="L59"></a><tt class="py-lineno"> 59</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L60"></a><tt class="py-lineno"> 60</tt>  <tt class="py-line"><tt class="py-docstring">        Create a new SparkContext. At least the master and app name should be set,</tt> </tt>
<a name="L61"></a><tt class="py-lineno"> 61</tt>  <tt class="py-line"><tt class="py-docstring">        either through the named parameters here or through C{conf}.</tt> </tt>
<a name="L62"></a><tt class="py-lineno"> 62</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L63"></a><tt class="py-lineno"> 63</tt>  <tt class="py-line"><tt class="py-docstring">        @param master: Cluster URL to connect to</tt> </tt>
<a name="L64"></a><tt class="py-lineno"> 64</tt>  <tt class="py-line"><tt class="py-docstring">               (e.g. mesos://host:port, spark://host:port, local[4]).</tt> </tt>
<a name="L65"></a><tt class="py-lineno"> 65</tt>  <tt class="py-line"><tt class="py-docstring">        @param appName: A name for your job, to display on the cluster web UI.</tt> </tt>
<a name="L66"></a><tt class="py-lineno"> 66</tt>  <tt class="py-line"><tt class="py-docstring">        @param sparkHome: Location where Spark is installed on cluster nodes.</tt> </tt>
<a name="L67"></a><tt class="py-lineno"> 67</tt>  <tt class="py-line"><tt class="py-docstring">        @param pyFiles: Collection of .zip or .py files to send to the cluster</tt> </tt>
<a name="L68"></a><tt class="py-lineno"> 68</tt>  <tt class="py-line"><tt class="py-docstring">               and add to PYTHONPATH.  These can be paths on the local file</tt> </tt>
<a name="L69"></a><tt class="py-lineno"> 69</tt>  <tt class="py-line"><tt class="py-docstring">               system or HDFS, HTTP, HTTPS, or FTP URLs.</tt> </tt>
<a name="L70"></a><tt class="py-lineno"> 70</tt>  <tt class="py-line"><tt class="py-docstring">        @param environment: A dictionary of environment variables to set on</tt> </tt>
<a name="L71"></a><tt class="py-lineno"> 71</tt>  <tt class="py-line"><tt class="py-docstring">               worker nodes.</tt> </tt>
<a name="L72"></a><tt class="py-lineno"> 72</tt>  <tt class="py-line"><tt class="py-docstring">        @param batchSize: The number of Python objects represented as a single</tt> </tt>
<a name="L73"></a><tt class="py-lineno"> 73</tt>  <tt class="py-line"><tt class="py-docstring">               Java object.  Set 1 to disable batching or -1 to use an</tt> </tt>
<a name="L74"></a><tt class="py-lineno"> 74</tt>  <tt class="py-line"><tt class="py-docstring">               unlimited batch size.</tt> </tt>
<a name="L75"></a><tt class="py-lineno"> 75</tt>  <tt class="py-line"><tt class="py-docstring">        @param serializer: The serializer for RDDs.</tt> </tt>
<a name="L76"></a><tt class="py-lineno"> 76</tt>  <tt class="py-line"><tt class="py-docstring">        @param conf: A L{SparkConf} object setting Spark properties.</tt> </tt>
<a name="L77"></a><tt class="py-lineno"> 77</tt>  <tt class="py-line"><tt class="py-docstring">        @param gateway: Use an existing gateway and JVM, otherwise a new JVM</tt> </tt>
<a name="L78"></a><tt class="py-lineno"> 78</tt>  <tt class="py-line"><tt class="py-docstring">               will be instatiated.</tt> </tt>
<a name="L79"></a><tt class="py-lineno"> 79</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L80"></a><tt class="py-lineno"> 80</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L81"></a><tt class="py-lineno"> 81</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; from pyspark.context import SparkContext</tt> </tt>
<a name="L82"></a><tt class="py-lineno"> 82</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sc = SparkContext('local', 'test')</tt> </tt>
<a name="L83"></a><tt class="py-lineno"> 83</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L84"></a><tt class="py-lineno"> 84</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL</tt> </tt>
<a name="L85"></a><tt class="py-lineno"> 85</tt>  <tt class="py-line"><tt class="py-docstring">        Traceback (most recent call last):</tt> </tt>
<a name="L86"></a><tt class="py-lineno"> 86</tt>  <tt class="py-line"><tt class="py-docstring">            ...</tt> </tt>
<a name="L87"></a><tt class="py-lineno"> 87</tt>  <tt class="py-line"><tt class="py-docstring">        ValueError:...</tt> </tt>
<a name="L88"></a><tt class="py-lineno"> 88</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L89"></a><tt class="py-lineno"> 89</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt id="link-34" class="py-name"><a title="pyspark.rdd" class="py-name" href="#" onclick="return doclink('link-34', 'rdd', 'link-22');">rdd</a></tt><tt class="py-op">.</tt><tt class="py-name">_extract_concise_traceback</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-keyword">is</tt> <tt class="py-keyword">not</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt>
<a name="L90"></a><tt class="py-lineno"> 90</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_callsite</tt> <tt class="py-op">=</tt> <tt id="link-35" class="py-name"><a title="pyspark.rdd" class="py-name" href="#" onclick="return doclink('link-35', 'rdd', 'link-22');">rdd</a></tt><tt class="py-op">.</tt><tt class="py-name">_extract_concise_traceback</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L91"></a><tt class="py-lineno"> 91</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L92"></a><tt class="py-lineno"> 92</tt>  <tt class="py-line">            <tt class="py-name">tempNamedTuple</tt> <tt class="py-op">=</tt> <tt class="py-name">namedtuple</tt><tt class="py-op">(</tt><tt class="py-string">"Callsite"</tt><tt class="py-op">,</tt> <tt class="py-string">"function file linenum"</tt><tt class="py-op">)</tt> </tt>
<a name="L93"></a><tt class="py-lineno"> 93</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_callsite</tt> <tt class="py-op">=</tt> <tt class="py-name">tempNamedTuple</tt><tt class="py-op">(</tt><tt class="py-name">function</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-name">file</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-name">linenum</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt> </tt>
<a name="L94"></a><tt class="py-lineno"> 94</tt>  <tt class="py-line">        <tt id="link-36" class="py-name" targets="Class pyspark.context.SparkContext=pyspark.context.SparkContext-class.html"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-36', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt class="py-name">_ensure_initialized</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">,</tt> <tt class="py-name">gateway</tt><tt class="py-op">=</tt><tt class="py-name">gateway</tt><tt class="py-op">)</tt> </tt>
<a name="L95"></a><tt class="py-lineno"> 95</tt>  <tt class="py-line"> </tt>
<a name="L96"></a><tt class="py-lineno"> 96</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">environment</tt> <tt class="py-op">=</tt> <tt class="py-name">environment</tt> <tt class="py-keyword">or</tt> <tt class="py-op">{</tt><tt class="py-op">}</tt> </tt>
<a name="L97"></a><tt class="py-lineno"> 97</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt> <tt class="py-op">=</tt> <tt id="link-37" class="py-name"><a title="pyspark.conf" class="py-name" href="#" onclick="return doclink('link-37', 'conf', 'link-9');">conf</a></tt> <tt class="py-keyword">or</tt> <tt id="link-38" class="py-name"><a title="pyspark.conf.SparkConf" class="py-name" href="#" onclick="return doclink('link-38', 'SparkConf', 'link-10');">SparkConf</a></tt><tt class="py-op">(</tt><tt id="link-39" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-39', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">=</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-40" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-40', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">)</tt> </tt>
<a name="L98"></a><tt class="py-lineno"> 98</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_batchSize</tt> <tt class="py-op">=</tt> <tt class="py-name">batchSize</tt>  <tt class="py-comment"># -1 represents an unlimited batch size</tt> </tt>
<a name="L99"></a><tt class="py-lineno"> 99</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_unbatched_serializer</tt> <tt class="py-op">=</tt> <tt class="py-name">serializer</tt> </tt>
<a name="L100"></a><tt class="py-lineno">100</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">batchSize</tt> <tt class="py-op">==</tt> <tt class="py-number">1</tt><tt class="py-op">:</tt> </tt>
<a name="L101"></a><tt class="py-lineno">101</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">serializer</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_unbatched_serializer</tt> </tt>
<a name="L102"></a><tt class="py-lineno">102</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L103"></a><tt class="py-lineno">103</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">serializer</tt> <tt class="py-op">=</tt> <tt class="py-name">BatchedSerializer</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_unbatched_serializer</tt><tt class="py-op">,</tt> </tt>
<a name="L104"></a><tt class="py-lineno">104</tt>  <tt class="py-line">                                                <tt class="py-name">batchSize</tt><tt class="py-op">)</tt> </tt>
<a name="L105"></a><tt class="py-lineno">105</tt>  <tt class="py-line"> </tt>
<a name="L106"></a><tt class="py-lineno">106</tt>  <tt class="py-line">        <tt class="py-comment"># Set any parameters passed directly to us on the conf</tt> </tt>
<a name="L107"></a><tt class="py-lineno">107</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">master</tt><tt class="py-op">:</tt> </tt>
<a name="L108"></a><tt class="py-lineno">108</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-41" class="py-name" targets="Method pyspark.conf.SparkConf.setMaster()=pyspark.conf.SparkConf-class.html#setMaster"><a title="pyspark.conf.SparkConf.setMaster" class="py-name" href="#" onclick="return doclink('link-41', 'setMaster', 'link-41');">setMaster</a></tt><tt class="py-op">(</tt><tt class="py-name">master</tt><tt class="py-op">)</tt> </tt>
<a name="L109"></a><tt class="py-lineno">109</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">appName</tt><tt class="py-op">:</tt> </tt>
<a name="L110"></a><tt class="py-lineno">110</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-42" class="py-name" targets="Method pyspark.conf.SparkConf.setAppName()=pyspark.conf.SparkConf-class.html#setAppName"><a title="pyspark.conf.SparkConf.setAppName" class="py-name" href="#" onclick="return doclink('link-42', 'setAppName', 'link-42');">setAppName</a></tt><tt class="py-op">(</tt><tt class="py-name">appName</tt><tt class="py-op">)</tt> </tt>
<a name="L111"></a><tt class="py-lineno">111</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">sparkHome</tt><tt class="py-op">:</tt> </tt>
<a name="L112"></a><tt class="py-lineno">112</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-43" class="py-name" targets="Method pyspark.conf.SparkConf.setSparkHome()=pyspark.conf.SparkConf-class.html#setSparkHome"><a title="pyspark.conf.SparkConf.setSparkHome" class="py-name" href="#" onclick="return doclink('link-43', 'setSparkHome', 'link-43');">setSparkHome</a></tt><tt class="py-op">(</tt><tt class="py-name">sparkHome</tt><tt class="py-op">)</tt> </tt>
<a name="L113"></a><tt class="py-lineno">113</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">environment</tt><tt class="py-op">:</tt> </tt>
<a name="L114"></a><tt class="py-lineno">114</tt>  <tt class="py-line">            <tt class="py-keyword">for</tt> <tt class="py-name">key</tt><tt class="py-op">,</tt> <tt id="link-44" class="py-name" targets="Method pyspark.accumulators.Accumulator.value()=pyspark.accumulators.Accumulator-class.html#value"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-44', 'value', 'link-44');">value</a></tt> <tt class="py-keyword">in</tt> <tt class="py-name">environment</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L115"></a><tt class="py-lineno">115</tt>  <tt class="py-line">                <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-45" class="py-name" targets="Method pyspark.conf.SparkConf.setExecutorEnv()=pyspark.conf.SparkConf-class.html#setExecutorEnv"><a title="pyspark.conf.SparkConf.setExecutorEnv" class="py-name" href="#" onclick="return doclink('link-45', 'setExecutorEnv', 'link-45');">setExecutorEnv</a></tt><tt class="py-op">(</tt><tt class="py-name">key</tt><tt class="py-op">,</tt> <tt id="link-46" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-46', 'value', 'link-44');">value</a></tt><tt class="py-op">)</tt> </tt>
<a name="L116"></a><tt class="py-lineno">116</tt>  <tt class="py-line"> </tt>
<a name="L117"></a><tt class="py-lineno">117</tt>  <tt class="py-line">        <tt class="py-comment"># Check that we have at least the required parameters</tt> </tt>
<a name="L118"></a><tt class="py-lineno">118</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-47" class="py-name" targets="Method pyspark.conf.SparkConf.contains()=pyspark.conf.SparkConf-class.html#contains"><a title="pyspark.conf.SparkConf.contains" class="py-name" href="#" onclick="return doclink('link-47', 'contains', 'link-47');">contains</a></tt><tt class="py-op">(</tt><tt class="py-string">"spark.master"</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L119"></a><tt class="py-lineno">119</tt>  <tt class="py-line">            <tt class="py-keyword">raise</tt> <tt class="py-name">Exception</tt><tt class="py-op">(</tt><tt class="py-string">"A master URL must be set in your configuration"</tt><tt class="py-op">)</tt> </tt>
<a name="L120"></a><tt class="py-lineno">120</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-48" class="py-name"><a title="pyspark.conf.SparkConf.contains" class="py-name" href="#" onclick="return doclink('link-48', 'contains', 'link-47');">contains</a></tt><tt class="py-op">(</tt><tt class="py-string">"spark.app.name"</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L121"></a><tt class="py-lineno">121</tt>  <tt class="py-line">            <tt class="py-keyword">raise</tt> <tt class="py-name">Exception</tt><tt class="py-op">(</tt><tt class="py-string">"An application name must be set in your configuration"</tt><tt class="py-op">)</tt> </tt>
<a name="L122"></a><tt class="py-lineno">122</tt>  <tt class="py-line"> </tt>
<a name="L123"></a><tt class="py-lineno">123</tt>  <tt class="py-line">        <tt class="py-comment"># Read back our properties from the conf in case we loaded some of them from</tt> </tt>
<a name="L124"></a><tt class="py-lineno">124</tt>  <tt class="py-line">        <tt class="py-comment"># the classpath or an external config file</tt> </tt>
<a name="L125"></a><tt class="py-lineno">125</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">master</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-49" class="py-name" targets="Method pyspark.conf.SparkConf.get()=pyspark.conf.SparkConf-class.html#get,Class Method pyspark.files.SparkFiles.get()=pyspark.files.SparkFiles-class.html#get"><a title="pyspark.conf.SparkConf.get
pyspark.files.SparkFiles.get" class="py-name" href="#" onclick="return doclink('link-49', 'get', 'link-49');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">"spark.master"</tt><tt class="py-op">)</tt> </tt>
<a name="L126"></a><tt class="py-lineno">126</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">appName</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-50" class="py-name"><a title="pyspark.conf.SparkConf.get
pyspark.files.SparkFiles.get" class="py-name" href="#" onclick="return doclink('link-50', 'get', 'link-49');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">"spark.app.name"</tt><tt class="py-op">)</tt> </tt>
<a name="L127"></a><tt class="py-lineno">127</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">sparkHome</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-51" class="py-name"><a title="pyspark.conf.SparkConf.get
pyspark.files.SparkFiles.get" class="py-name" href="#" onclick="return doclink('link-51', 'get', 'link-49');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">"spark.home"</tt><tt class="py-op">,</tt> <tt class="py-name">None</tt><tt class="py-op">)</tt> </tt>
<a name="L128"></a><tt class="py-lineno">128</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">k</tt><tt class="py-op">,</tt> <tt class="py-name">v</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-52" class="py-name" targets="Method pyspark.conf.SparkConf.getAll()=pyspark.conf.SparkConf-class.html#getAll"><a title="pyspark.conf.SparkConf.getAll" class="py-name" href="#" onclick="return doclink('link-52', 'getAll', 'link-52');">getAll</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L129"></a><tt class="py-lineno">129</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">k</tt><tt class="py-op">.</tt><tt class="py-name">startswith</tt><tt class="py-op">(</tt><tt class="py-string">"spark.executorEnv."</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L130"></a><tt class="py-lineno">130</tt>  <tt class="py-line">                <tt class="py-name">varName</tt> <tt class="py-op">=</tt> <tt class="py-name">k</tt><tt class="py-op">[</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-string">"spark.executorEnv."</tt><tt class="py-op">)</tt><tt class="py-op">:</tt><tt class="py-op">]</tt> </tt>
<a name="L131"></a><tt class="py-lineno">131</tt>  <tt class="py-line">                <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">environment</tt><tt class="py-op">[</tt><tt class="py-name">varName</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">v</tt> </tt>
<a name="L132"></a><tt class="py-lineno">132</tt>  <tt class="py-line"> </tt>
<a name="L133"></a><tt class="py-lineno">133</tt>  <tt class="py-line">        <tt class="py-comment"># Create the Java SparkContext through Py4J</tt> </tt>
<a name="L134"></a><tt class="py-lineno">134</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_initialize_context</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt class="py-name">_jconf</tt><tt class="py-op">)</tt> </tt>
<a name="L135"></a><tt class="py-lineno">135</tt>  <tt class="py-line"> </tt>
<a name="L136"></a><tt class="py-lineno">136</tt>  <tt class="py-line">        <tt class="py-comment"># Create a single Accumulator in Java that we'll send all our updates through;</tt> </tt>
<a name="L137"></a><tt class="py-lineno">137</tt>  <tt class="py-line">        <tt class="py-comment"># they will be passed back to us through a TCP server</tt> </tt>
<a name="L138"></a><tt class="py-lineno">138</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_accumulatorServer</tt> <tt class="py-op">=</tt> <tt id="link-53" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-53', 'accumulators', 'link-1');">accumulators</a></tt><tt class="py-op">.</tt><tt class="py-name">_start_update_server</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L139"></a><tt class="py-lineno">139</tt>  <tt class="py-line">        <tt class="py-op">(</tt><tt class="py-name">host</tt><tt class="py-op">,</tt> <tt class="py-name">port</tt><tt class="py-op">)</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_accumulatorServer</tt><tt class="py-op">.</tt><tt class="py-name">server_address</tt> </tt>
<a name="L140"></a><tt class="py-lineno">140</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_javaAccumulator</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-54" class="py-name" targets="Method pyspark.context.SparkContext.accumulator()=pyspark.context.SparkContext-class.html#accumulator"><a title="pyspark.context.SparkContext.accumulator" class="py-name" href="#" onclick="return doclink('link-54', 'accumulator', 'link-54');">accumulator</a></tt><tt class="py-op">(</tt> </tt>
<a name="L141"></a><tt class="py-lineno">141</tt>  <tt class="py-line">                <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-55" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-55', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">java</tt><tt class="py-op">.</tt><tt id="link-56" class="py-name" targets="Module pyspark.mllib.util=pyspark.mllib.util-module.html"><a title="pyspark.mllib.util" class="py-name" href="#" onclick="return doclink('link-56', 'util', 'link-56');">util</a></tt><tt class="py-op">.</tt><tt class="py-name">ArrayList</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> </tt>
<a name="L142"></a><tt class="py-lineno">142</tt>  <tt class="py-line">                <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-57" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-57', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">PythonAccumulatorParam</tt><tt class="py-op">(</tt><tt class="py-name">host</tt><tt class="py-op">,</tt> <tt class="py-name">port</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L143"></a><tt class="py-lineno">143</tt>  <tt class="py-line"> </tt>
<a name="L144"></a><tt class="py-lineno">144</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">pythonExec</tt> <tt class="py-op">=</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">environ</tt><tt class="py-op">.</tt><tt id="link-58" class="py-name"><a title="pyspark.conf.SparkConf.get
pyspark.files.SparkFiles.get" class="py-name" href="#" onclick="return doclink('link-58', 'get', 'link-49');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">"PYSPARK_PYTHON"</tt><tt class="py-op">,</tt> <tt class="py-string">'python'</tt><tt class="py-op">)</tt> </tt>
<a name="L145"></a><tt class="py-lineno">145</tt>  <tt class="py-line"> </tt>
<a name="L146"></a><tt class="py-lineno">146</tt>  <tt class="py-line">        <tt class="py-comment"># Broadcast's __reduce__ method stores Broadcast instances here.</tt> </tt>
<a name="L147"></a><tt class="py-lineno">147</tt>  <tt class="py-line">        <tt class="py-comment"># This allows other code to determine which Broadcast instances have</tt> </tt>
<a name="L148"></a><tt class="py-lineno">148</tt>  <tt class="py-line">        <tt class="py-comment"># been pickled, so it can determine which Java broadcast objects to</tt> </tt>
<a name="L149"></a><tt class="py-lineno">149</tt>  <tt class="py-line">        <tt class="py-comment"># send.</tt> </tt>
<a name="L150"></a><tt class="py-lineno">150</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_pickled_broadcast_vars</tt> <tt class="py-op">=</tt> <tt id="link-59" class="py-name" targets="Method pyspark.conf.SparkConf.set()=pyspark.conf.SparkConf-class.html#set"><a title="pyspark.conf.SparkConf.set" class="py-name" href="#" onclick="return doclink('link-59', 'set', 'link-59');">set</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L151"></a><tt class="py-lineno">151</tt>  <tt class="py-line"> </tt>
<a name="L152"></a><tt class="py-lineno">152</tt>  <tt class="py-line">        <tt id="link-60" class="py-name"><a title="pyspark.files.SparkFiles" class="py-name" href="#" onclick="return doclink('link-60', 'SparkFiles', 'link-13');">SparkFiles</a></tt><tt class="py-op">.</tt><tt id="link-61" class="py-name" targets="Variable pyspark.files.SparkFiles._sc=pyspark.files.SparkFiles-class.html#_sc"><a title="pyspark.files.SparkFiles._sc" class="py-name" href="#" onclick="return doclink('link-61', '_sc', 'link-61');">_sc</a></tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt> </tt>
<a name="L153"></a><tt class="py-lineno">153</tt>  <tt class="py-line">        <tt class="py-name">root_dir</tt> <tt class="py-op">=</tt> <tt id="link-62" class="py-name"><a title="pyspark.files.SparkFiles" class="py-name" href="#" onclick="return doclink('link-62', 'SparkFiles', 'link-13');">SparkFiles</a></tt><tt class="py-op">.</tt><tt id="link-63" class="py-name" targets="Class Method pyspark.files.SparkFiles.getRootDirectory()=pyspark.files.SparkFiles-class.html#getRootDirectory"><a title="pyspark.files.SparkFiles.getRootDirectory" class="py-name" href="#" onclick="return doclink('link-63', 'getRootDirectory', 'link-63');">getRootDirectory</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L154"></a><tt class="py-lineno">154</tt>  <tt class="py-line">        <tt class="py-name">sys</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">root_dir</tt><tt class="py-op">)</tt> </tt>
<a name="L155"></a><tt class="py-lineno">155</tt>  <tt class="py-line"> </tt>
<a name="L156"></a><tt class="py-lineno">156</tt>  <tt class="py-line">        <tt class="py-comment"># Deploy any code dependencies specified in the constructor</tt> </tt>
<a name="L157"></a><tt class="py-lineno">157</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-64" class="py-name"><a title="pyspark.context.SparkContext._python_includes" class="py-name" href="#" onclick="return doclink('link-64', '_python_includes', 'link-32');">_python_includes</a></tt> <tt class="py-op">=</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L158"></a><tt class="py-lineno">158</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">path</tt> <tt class="py-keyword">in</tt> <tt class="py-op">(</tt><tt class="py-name">pyFiles</tt> <tt class="py-keyword">or</tt> <tt class="py-op">[</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L159"></a><tt class="py-lineno">159</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-65" class="py-name" targets="Method pyspark.context.SparkContext.addPyFile()=pyspark.context.SparkContext-class.html#addPyFile"><a title="pyspark.context.SparkContext.addPyFile" class="py-name" href="#" onclick="return doclink('link-65', 'addPyFile', 'link-65');">addPyFile</a></tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">)</tt> </tt>
<a name="L160"></a><tt class="py-lineno">160</tt>  <tt class="py-line"> </tt>
<a name="L161"></a><tt class="py-lineno">161</tt>  <tt class="py-line">        <tt class="py-comment"># Deploy code dependencies set by spark-submit; these will already have been added</tt> </tt>
<a name="L162"></a><tt class="py-lineno">162</tt>  <tt class="py-line">        <tt class="py-comment"># with SparkContext.addFile, so we just need to add them to the PYTHONPATH</tt> </tt>
<a name="L163"></a><tt class="py-lineno">163</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">path</tt> <tt class="py-keyword">in</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_conf</tt><tt class="py-op">.</tt><tt id="link-66" class="py-name"><a title="pyspark.conf.SparkConf.get
pyspark.files.SparkFiles.get" class="py-name" href="#" onclick="return doclink('link-66', 'get', 'link-49');">get</a></tt><tt class="py-op">(</tt><tt class="py-string">"spark.submit.pyFiles"</tt><tt class="py-op">,</tt> <tt class="py-string">""</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-string">","</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L164"></a><tt class="py-lineno">164</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">path</tt> <tt class="py-op">!=</tt> <tt class="py-string">""</tt><tt class="py-op">:</tt> </tt>
<a name="L165"></a><tt class="py-lineno">165</tt>  <tt class="py-line">                <tt class="py-op">(</tt><tt class="py-name">dirname</tt><tt class="py-op">,</tt> <tt class="py-name">filename</tt><tt class="py-op">)</tt> <tt class="py-op">=</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">)</tt> </tt>
<a name="L166"></a><tt class="py-lineno">166</tt>  <tt class="py-line">                <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-67" class="py-name"><a title="pyspark.context.SparkContext._python_includes" class="py-name" href="#" onclick="return doclink('link-67', '_python_includes', 'link-32');">_python_includes</a></tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">filename</tt><tt class="py-op">)</tt> </tt>
<a name="L167"></a><tt class="py-lineno">167</tt>  <tt class="py-line">                <tt class="py-name">sys</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">)</tt> </tt>
<a name="L168"></a><tt class="py-lineno">168</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">dirname</tt> <tt class="py-keyword">in</tt> <tt class="py-name">sys</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">:</tt> </tt>
<a name="L169"></a><tt class="py-lineno">169</tt>  <tt class="py-line">                    <tt class="py-name">sys</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">dirname</tt><tt class="py-op">)</tt> </tt>
<a name="L170"></a><tt class="py-lineno">170</tt>  <tt class="py-line"> </tt>
<a name="L171"></a><tt class="py-lineno">171</tt>  <tt class="py-line">        <tt class="py-comment"># Create a temporary directory inside spark.local.dir:</tt> </tt>
<a name="L172"></a><tt class="py-lineno">172</tt>  <tt class="py-line">        <tt class="py-name">local_dir</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-68" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-68', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">org</tt><tt class="py-op">.</tt><tt class="py-name">apache</tt><tt class="py-op">.</tt><tt class="py-name">spark</tt><tt class="py-op">.</tt><tt id="link-69" class="py-name"><a title="pyspark.mllib.util" class="py-name" href="#" onclick="return doclink('link-69', 'util', 'link-56');">util</a></tt><tt class="py-op">.</tt><tt class="py-name">Utils</tt><tt class="py-op">.</tt><tt class="py-name">getLocalDir</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-70" class="py-name"><a title="pyspark.conf" class="py-name" href="#" onclick="return doclink('link-70', 'conf', 'link-9');">conf</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L173"></a><tt class="py-lineno">173</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_temp_dir</tt> <tt class="py-op">=</tt> \ </tt>
<a name="L174"></a><tt class="py-lineno">174</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-71" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-71', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">org</tt><tt class="py-op">.</tt><tt class="py-name">apache</tt><tt class="py-op">.</tt><tt class="py-name">spark</tt><tt class="py-op">.</tt><tt id="link-72" class="py-name"><a title="pyspark.mllib.util" class="py-name" href="#" onclick="return doclink('link-72', 'util', 'link-56');">util</a></tt><tt class="py-op">.</tt><tt class="py-name">Utils</tt><tt class="py-op">.</tt><tt class="py-name">createTempDir</tt><tt class="py-op">(</tt><tt class="py-name">local_dir</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">getAbsolutePath</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L175"></a><tt class="py-lineno">175</tt>  <tt class="py-line"> </tt>
<a name="SparkContext._initialize_context"></a><div id="SparkContext._initialize_context-def"><a name="L176"></a><tt class="py-lineno">176</tt> <a class="py-toggle" href="#" id="SparkContext._initialize_context-toggle" onclick="return toggle('SparkContext._initialize_context');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#_initialize_context">_initialize_context</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">jconf</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext._initialize_context-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext._initialize_context-expanded"><a name="L177"></a><tt class="py-lineno">177</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L178"></a><tt class="py-lineno">178</tt>  <tt class="py-line"><tt class="py-docstring">        Initialize SparkContext in function to allow subclass specific initialization</tt> </tt>
<a name="L179"></a><tt class="py-lineno">179</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L180"></a><tt class="py-lineno">180</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-73" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-73', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">JavaSparkContext</tt><tt class="py-op">(</tt><tt class="py-name">jconf</tt><tt class="py-op">)</tt> </tt>
</div><a name="L181"></a><tt class="py-lineno">181</tt>  <tt class="py-line"> </tt>
<a name="L182"></a><tt class="py-lineno">182</tt>  <tt class="py-line">    <tt class="py-decorator">@</tt><tt class="py-decorator">classmethod</tt> </tt>
<a name="SparkContext._ensure_initialized"></a><div id="SparkContext._ensure_initialized-def"><a name="L183"></a><tt class="py-lineno">183</tt> <a class="py-toggle" href="#" id="SparkContext._ensure_initialized-toggle" onclick="return toggle('SparkContext._ensure_initialized');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#_ensure_initialized">_ensure_initialized</a><tt class="py-op">(</tt><tt class="py-param">cls</tt><tt class="py-op">,</tt> <tt class="py-param">instance</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">,</tt> <tt class="py-param">gateway</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext._ensure_initialized-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext._ensure_initialized-expanded"><a name="L184"></a><tt class="py-lineno">184</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L185"></a><tt class="py-lineno">185</tt>  <tt class="py-line"><tt class="py-docstring">        Checks whether a SparkContext is initialized or not.</tt> </tt>
<a name="L186"></a><tt class="py-lineno">186</tt>  <tt class="py-line"><tt class="py-docstring">        Throws error if a SparkContext is already running.</tt> </tt>
<a name="L187"></a><tt class="py-lineno">187</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L188"></a><tt class="py-lineno">188</tt>  <tt class="py-line">        <tt class="py-keyword">with</tt> <tt id="link-74" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-74', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-75" class="py-name"><a title="pyspark.context.SparkContext._lock" class="py-name" href="#" onclick="return doclink('link-75', '_lock', 'link-31');">_lock</a></tt><tt class="py-op">:</tt> </tt>
<a name="L189"></a><tt class="py-lineno">189</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt id="link-76" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-76', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-77" class="py-name"><a title="pyspark.context.SparkContext._gateway" class="py-name" href="#" onclick="return doclink('link-77', '_gateway', 'link-26');">_gateway</a></tt><tt class="py-op">:</tt> </tt>
<a name="L190"></a><tt class="py-lineno">190</tt>  <tt class="py-line">                <tt id="link-78" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-78', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-79" class="py-name"><a title="pyspark.context.SparkContext._gateway" class="py-name" href="#" onclick="return doclink('link-79', '_gateway', 'link-26');">_gateway</a></tt> <tt class="py-op">=</tt> <tt class="py-name">gateway</tt> <tt class="py-keyword">or</tt> <tt class="py-name">launch_gateway</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L191"></a><tt class="py-lineno">191</tt>  <tt class="py-line">                <tt id="link-80" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-80', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-81" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-81', '_jvm', 'link-27');">_jvm</a></tt> <tt class="py-op">=</tt> <tt id="link-82" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-82', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-83" class="py-name"><a title="pyspark.context.SparkContext._gateway" class="py-name" href="#" onclick="return doclink('link-83', '_gateway', 'link-26');">_gateway</a></tt><tt class="py-op">.</tt><tt class="py-name">jvm</tt> </tt>
<a name="L192"></a><tt class="py-lineno">192</tt>  <tt class="py-line">                <tt id="link-84" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-84', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-85" class="py-name"><a title="pyspark.context.SparkContext._writeToFile" class="py-name" href="#" onclick="return doclink('link-85', '_writeToFile', 'link-28');">_writeToFile</a></tt> <tt class="py-op">=</tt> <tt id="link-86" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-86', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-87" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-87', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">PythonRDD</tt><tt class="py-op">.</tt><tt class="py-name">writeToFile</tt> </tt>
<a name="L193"></a><tt class="py-lineno">193</tt>  <tt class="py-line"> </tt>
<a name="L194"></a><tt class="py-lineno">194</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">instance</tt><tt class="py-op">:</tt> </tt>
<a name="L195"></a><tt class="py-lineno">195</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt id="link-88" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-88', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-89" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-89', '_active_spark_context', 'link-30');">_active_spark_context</a></tt> <tt class="py-keyword">and</tt> <tt id="link-90" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-90', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-91" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-91', '_active_spark_context', 'link-30');">_active_spark_context</a></tt> <tt class="py-op">!=</tt> <tt class="py-name">instance</tt><tt class="py-op">:</tt> </tt>
<a name="L196"></a><tt class="py-lineno">196</tt>  <tt class="py-line">                    <tt class="py-name">currentMaster</tt> <tt class="py-op">=</tt> <tt id="link-92" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-92', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-93" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-93', '_active_spark_context', 'link-30');">_active_spark_context</a></tt><tt class="py-op">.</tt><tt class="py-name">master</tt> </tt>
<a name="L197"></a><tt class="py-lineno">197</tt>  <tt class="py-line">                    <tt class="py-name">currentAppName</tt> <tt class="py-op">=</tt> <tt id="link-94" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-94', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-95" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-95', '_active_spark_context', 'link-30');">_active_spark_context</a></tt><tt class="py-op">.</tt><tt class="py-name">appName</tt> </tt>
<a name="L198"></a><tt class="py-lineno">198</tt>  <tt class="py-line">                    <tt class="py-name">callsite</tt> <tt class="py-op">=</tt> <tt id="link-96" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-96', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-97" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-97', '_active_spark_context', 'link-30');">_active_spark_context</a></tt><tt class="py-op">.</tt><tt class="py-name">_callsite</tt> </tt>
<a name="L199"></a><tt class="py-lineno">199</tt>  <tt class="py-line"> </tt>
<a name="L200"></a><tt class="py-lineno">200</tt>  <tt class="py-line">                    <tt class="py-comment"># Raise error if there is already a running Spark context</tt> </tt>
<a name="L201"></a><tt class="py-lineno">201</tt>  <tt class="py-line">                    <tt class="py-keyword">raise</tt> <tt class="py-name">ValueError</tt><tt class="py-op">(</tt><tt class="py-string">"Cannot run multiple SparkContexts at once; existing SparkContext(app=%s, master=%s)"</tt> \ </tt>
<a name="L202"></a><tt class="py-lineno">202</tt>  <tt class="py-line">                        <tt class="py-string">" created by %s at %s:%s "</tt> \ </tt>
<a name="L203"></a><tt class="py-lineno">203</tt>  <tt class="py-line">                        <tt class="py-op">%</tt> <tt class="py-op">(</tt><tt class="py-name">currentAppName</tt><tt class="py-op">,</tt> <tt class="py-name">currentMaster</tt><tt class="py-op">,</tt> <tt class="py-name">callsite</tt><tt class="py-op">.</tt><tt class="py-name">function</tt><tt class="py-op">,</tt> <tt class="py-name">callsite</tt><tt class="py-op">.</tt><tt class="py-name">file</tt><tt class="py-op">,</tt> <tt class="py-name">callsite</tt><tt class="py-op">.</tt><tt class="py-name">linenum</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L204"></a><tt class="py-lineno">204</tt>  <tt class="py-line">                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L205"></a><tt class="py-lineno">205</tt>  <tt class="py-line">                    <tt id="link-98" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-98', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-99" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-99', '_active_spark_context', 'link-30');">_active_spark_context</a></tt> <tt class="py-op">=</tt> <tt class="py-name">instance</tt> </tt>
</div><a name="L206"></a><tt class="py-lineno">206</tt>  <tt class="py-line"> </tt>
<a name="L207"></a><tt class="py-lineno">207</tt>  <tt class="py-line">    <tt class="py-decorator">@</tt><tt class="py-decorator">classmethod</tt> </tt>
<a name="SparkContext.setSystemProperty"></a><div id="SparkContext.setSystemProperty-def"><a name="L208"></a><tt class="py-lineno">208</tt> <a class="py-toggle" href="#" id="SparkContext.setSystemProperty-toggle" onclick="return toggle('SparkContext.setSystemProperty');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#setSystemProperty">setSystemProperty</a><tt class="py-op">(</tt><tt class="py-param">cls</tt><tt class="py-op">,</tt> <tt class="py-param">key</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.setSystemProperty-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.setSystemProperty-expanded"><a name="L209"></a><tt class="py-lineno">209</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L210"></a><tt class="py-lineno">210</tt>  <tt class="py-line"><tt class="py-docstring">        Set a Java system property, such as spark.executor.memory. This must</tt> </tt>
<a name="L211"></a><tt class="py-lineno">211</tt>  <tt class="py-line"><tt class="py-docstring">        must be invoked before instantiating SparkContext.</tt> </tt>
<a name="L212"></a><tt class="py-lineno">212</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L213"></a><tt class="py-lineno">213</tt>  <tt class="py-line">        <tt id="link-100" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-100', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt class="py-name">_ensure_initialized</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L214"></a><tt class="py-lineno">214</tt>  <tt class="py-line">        <tt id="link-101" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-101', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-102" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-102', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">java</tt><tt class="py-op">.</tt><tt class="py-name">lang</tt><tt class="py-op">.</tt><tt class="py-name">System</tt><tt class="py-op">.</tt><tt class="py-name">setProperty</tt><tt class="py-op">(</tt><tt class="py-name">key</tt><tt class="py-op">,</tt> <tt id="link-103" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-103', 'value', 'link-44');">value</a></tt><tt class="py-op">)</tt> </tt>
</div><a name="L215"></a><tt class="py-lineno">215</tt>  <tt class="py-line"> </tt>
<a name="L216"></a><tt class="py-lineno">216</tt>  <tt class="py-line">    <tt class="py-decorator">@</tt><tt class="py-decorator">property</tt> </tt>
<a name="SparkContext.defaultParallelism"></a><div id="SparkContext.defaultParallelism-def"><a name="L217"></a><tt class="py-lineno">217</tt> <a class="py-toggle" href="#" id="SparkContext.defaultParallelism-toggle" onclick="return toggle('SparkContext.defaultParallelism');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#defaultParallelism">defaultParallelism</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.defaultParallelism-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.defaultParallelism-expanded"><a name="L218"></a><tt class="py-lineno">218</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L219"></a><tt class="py-lineno">219</tt>  <tt class="py-line"><tt class="py-docstring">        Default level of parallelism to use when not given by user (e.g. for</tt> </tt>
<a name="L220"></a><tt class="py-lineno">220</tt>  <tt class="py-line"><tt class="py-docstring">        reduce tasks)</tt> </tt>
<a name="L221"></a><tt class="py-lineno">221</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L222"></a><tt class="py-lineno">222</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-104" class="py-name" targets="Method pyspark.context.SparkContext.defaultParallelism()=pyspark.context.SparkContext-class.html#defaultParallelism"><a title="pyspark.context.SparkContext.defaultParallelism" class="py-name" href="#" onclick="return doclink('link-104', 'defaultParallelism', 'link-104');">defaultParallelism</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L223"></a><tt class="py-lineno">223</tt>  <tt class="py-line"> </tt>
<a name="L224"></a><tt class="py-lineno">224</tt>  <tt class="py-line">    <tt class="py-decorator">@</tt><tt class="py-decorator">property</tt> </tt>
<a name="SparkContext.defaultMinPartitions"></a><div id="SparkContext.defaultMinPartitions-def"><a name="L225"></a><tt class="py-lineno">225</tt> <a class="py-toggle" href="#" id="SparkContext.defaultMinPartitions-toggle" onclick="return toggle('SparkContext.defaultMinPartitions');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#defaultMinPartitions">defaultMinPartitions</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.defaultMinPartitions-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.defaultMinPartitions-expanded"><a name="L226"></a><tt class="py-lineno">226</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L227"></a><tt class="py-lineno">227</tt>  <tt class="py-line"><tt class="py-docstring">        Default min number of partitions for Hadoop RDDs when not given by user</tt> </tt>
<a name="L228"></a><tt class="py-lineno">228</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L229"></a><tt class="py-lineno">229</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-105" class="py-name" targets="Method pyspark.context.SparkContext.defaultMinPartitions()=pyspark.context.SparkContext-class.html#defaultMinPartitions"><a title="pyspark.context.SparkContext.defaultMinPartitions" class="py-name" href="#" onclick="return doclink('link-105', 'defaultMinPartitions', 'link-105');">defaultMinPartitions</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L230"></a><tt class="py-lineno">230</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.__del__"></a><div id="SparkContext.__del__-def"><a name="L231"></a><tt class="py-lineno">231</tt> <a class="py-toggle" href="#" id="SparkContext.__del__-toggle" onclick="return toggle('SparkContext.__del__');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#__del__">__del__</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.__del__-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.__del__-expanded"><a name="L232"></a><tt class="py-lineno">232</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-106" class="py-name" targets="Method pyspark.context.SparkContext.stop()=pyspark.context.SparkContext-class.html#stop"><a title="pyspark.context.SparkContext.stop" class="py-name" href="#" onclick="return doclink('link-106', 'stop', 'link-106');">stop</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L233"></a><tt class="py-lineno">233</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.stop"></a><div id="SparkContext.stop-def"><a name="L234"></a><tt class="py-lineno">234</tt> <a class="py-toggle" href="#" id="SparkContext.stop-toggle" onclick="return toggle('SparkContext.stop');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#stop">stop</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.stop-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.stop-expanded"><a name="L235"></a><tt class="py-lineno">235</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L236"></a><tt class="py-lineno">236</tt>  <tt class="py-line"><tt class="py-docstring">        Shut down the SparkContext.</tt> </tt>
<a name="L237"></a><tt class="py-lineno">237</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L238"></a><tt class="py-lineno">238</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">:</tt> </tt>
<a name="L239"></a><tt class="py-lineno">239</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-107" class="py-name"><a title="pyspark.context.SparkContext.stop" class="py-name" href="#" onclick="return doclink('link-107', 'stop', 'link-106');">stop</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L240"></a><tt class="py-lineno">240</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L241"></a><tt class="py-lineno">241</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_accumulatorServer</tt><tt class="py-op">:</tt> </tt>
<a name="L242"></a><tt class="py-lineno">242</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_accumulatorServer</tt><tt class="py-op">.</tt><tt class="py-name">shutdown</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L243"></a><tt class="py-lineno">243</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_accumulatorServer</tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
<a name="L244"></a><tt class="py-lineno">244</tt>  <tt class="py-line">        <tt class="py-keyword">with</tt> <tt id="link-108" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-108', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-109" class="py-name"><a title="pyspark.context.SparkContext._lock" class="py-name" href="#" onclick="return doclink('link-109', '_lock', 'link-31');">_lock</a></tt><tt class="py-op">:</tt> </tt>
<a name="L245"></a><tt class="py-lineno">245</tt>  <tt class="py-line">            <tt id="link-110" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-110', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-111" class="py-name"><a title="pyspark.context.SparkContext._active_spark_context" class="py-name" href="#" onclick="return doclink('link-111', '_active_spark_context', 'link-30');">_active_spark_context</a></tt> <tt class="py-op">=</tt> <tt class="py-name">None</tt> </tt>
</div><a name="L246"></a><tt class="py-lineno">246</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.parallelize"></a><div id="SparkContext.parallelize-def"><a name="L247"></a><tt class="py-lineno">247</tt> <a class="py-toggle" href="#" id="SparkContext.parallelize-toggle" onclick="return toggle('SparkContext.parallelize');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#parallelize">parallelize</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">c</tt><tt class="py-op">,</tt> <tt class="py-param">numSlices</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.parallelize-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.parallelize-expanded"><a name="L248"></a><tt class="py-lineno">248</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L249"></a><tt class="py-lineno">249</tt>  <tt class="py-line"><tt class="py-docstring">        Distribute a local Python collection to form an RDD.</tt> </tt>
<a name="L250"></a><tt class="py-lineno">250</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L251"></a><tt class="py-lineno">251</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sc.parallelize(range(5), 5).glom().collect()</tt> </tt>
<a name="L252"></a><tt class="py-lineno">252</tt>  <tt class="py-line"><tt class="py-docstring">        [[0], [1], [2], [3], [4]]</tt> </tt>
<a name="L253"></a><tt class="py-lineno">253</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L254"></a><tt class="py-lineno">254</tt>  <tt class="py-line">        <tt class="py-name">numSlices</tt> <tt class="py-op">=</tt> <tt class="py-name">numSlices</tt> <tt class="py-keyword">or</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-112" class="py-name"><a title="pyspark.context.SparkContext.defaultParallelism" class="py-name" href="#" onclick="return doclink('link-112', 'defaultParallelism', 'link-104');">defaultParallelism</a></tt> </tt>
<a name="L255"></a><tt class="py-lineno">255</tt>  <tt class="py-line">        <tt class="py-comment"># Calling the Java parallelize() method with an ArrayList is too slow,</tt> </tt>
<a name="L256"></a><tt class="py-lineno">256</tt>  <tt class="py-line">        <tt class="py-comment"># because it sends O(n) Py4J commands.  As an alternative, serialized</tt> </tt>
<a name="L257"></a><tt class="py-lineno">257</tt>  <tt class="py-line">        <tt class="py-comment"># objects are written to a file and loaded through textFile().</tt> </tt>
<a name="L258"></a><tt class="py-lineno">258</tt>  <tt class="py-line">        <tt class="py-name">tempFile</tt> <tt class="py-op">=</tt> <tt class="py-name">NamedTemporaryFile</tt><tt class="py-op">(</tt><tt class="py-name">delete</tt><tt class="py-op">=</tt><tt class="py-name">False</tt><tt class="py-op">,</tt> <tt class="py-name">dir</tt><tt class="py-op">=</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_temp_dir</tt><tt class="py-op">)</tt> </tt>
<a name="L259"></a><tt class="py-lineno">259</tt>  <tt class="py-line">        <tt class="py-comment"># Make sure we distribute data evenly if it's smaller than self.batchSize</tt> </tt>
<a name="L260"></a><tt class="py-lineno">260</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-string">"__len__"</tt> <tt class="py-keyword">not</tt> <tt class="py-keyword">in</tt> <tt class="py-name">dir</tt><tt class="py-op">(</tt><tt class="py-name">c</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L261"></a><tt class="py-lineno">261</tt>  <tt class="py-line">            <tt class="py-name">c</tt> <tt class="py-op">=</tt> <tt class="py-name">list</tt><tt class="py-op">(</tt><tt class="py-name">c</tt><tt class="py-op">)</tt>    <tt class="py-comment"># Make it a list so we can compute its length</tt> </tt>
<a name="L262"></a><tt class="py-lineno">262</tt>  <tt class="py-line">        <tt class="py-name">batchSize</tt> <tt class="py-op">=</tt> <tt id="link-113" class="py-name" targets="Method pyspark.rdd.RDD.min()=pyspark.rdd.RDD-class.html#min,Method pyspark.statcounter.StatCounter.min()=pyspark.statcounter.StatCounter-class.html#min"><a title="pyspark.rdd.RDD.min
pyspark.statcounter.StatCounter.min" class="py-name" href="#" onclick="return doclink('link-113', 'min', 'link-113');">min</a></tt><tt class="py-op">(</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">c</tt><tt class="py-op">)</tt> <tt class="py-op">//</tt> <tt class="py-name">numSlices</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_batchSize</tt><tt class="py-op">)</tt> </tt>
<a name="L263"></a><tt class="py-lineno">263</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">batchSize</tt> <tt class="py-op">&gt;</tt> <tt class="py-number">1</tt><tt class="py-op">:</tt> </tt>
<a name="L264"></a><tt class="py-lineno">264</tt>  <tt class="py-line">            <tt class="py-name">serializer</tt> <tt class="py-op">=</tt> <tt class="py-name">BatchedSerializer</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_unbatched_serializer</tt><tt class="py-op">,</tt> </tt>
<a name="L265"></a><tt class="py-lineno">265</tt>  <tt class="py-line">                                           <tt class="py-name">batchSize</tt><tt class="py-op">)</tt> </tt>
<a name="L266"></a><tt class="py-lineno">266</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L267"></a><tt class="py-lineno">267</tt>  <tt class="py-line">            <tt class="py-name">serializer</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_unbatched_serializer</tt> </tt>
<a name="L268"></a><tt class="py-lineno">268</tt>  <tt class="py-line">        <tt class="py-name">serializer</tt><tt class="py-op">.</tt><tt class="py-name">dump_stream</tt><tt class="py-op">(</tt><tt class="py-name">c</tt><tt class="py-op">,</tt> <tt class="py-name">tempFile</tt><tt class="py-op">)</tt> </tt>
<a name="L269"></a><tt class="py-lineno">269</tt>  <tt class="py-line">        <tt class="py-name">tempFile</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L270"></a><tt class="py-lineno">270</tt>  <tt class="py-line">        <tt class="py-name">readRDDFromFile</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-114" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-114', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">PythonRDD</tt><tt class="py-op">.</tt><tt class="py-name">readRDDFromFile</tt> </tt>
<a name="L271"></a><tt class="py-lineno">271</tt>  <tt class="py-line">        <tt class="py-name">jrdd</tt> <tt class="py-op">=</tt> <tt class="py-name">readRDDFromFile</tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">,</tt> <tt class="py-name">tempFile</tt><tt class="py-op">.</tt><tt id="link-115" class="py-name" targets="Method pyspark.rdd.RDD.name()=pyspark.rdd.RDD-class.html#name"><a title="pyspark.rdd.RDD.name" class="py-name" href="#" onclick="return doclink('link-115', 'name', 'link-115');">name</a></tt><tt class="py-op">,</tt> <tt class="py-name">numSlices</tt><tt class="py-op">)</tt> </tt>
<a name="L272"></a><tt class="py-lineno">272</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-116" class="py-name"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-116', 'RDD', 'link-25');">RDD</a></tt><tt class="py-op">(</tt><tt class="py-name">jrdd</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">,</tt> <tt class="py-name">serializer</tt><tt class="py-op">)</tt> </tt>
</div><a name="L273"></a><tt class="py-lineno">273</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.textFile"></a><div id="SparkContext.textFile-def"><a name="L274"></a><tt class="py-lineno">274</tt> <a class="py-toggle" href="#" id="SparkContext.textFile-toggle" onclick="return toggle('SparkContext.textFile');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#textFile">textFile</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">name</tt><tt class="py-op">,</tt> <tt class="py-param">minPartitions</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.textFile-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.textFile-expanded"><a name="L275"></a><tt class="py-lineno">275</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L276"></a><tt class="py-lineno">276</tt>  <tt class="py-line"><tt class="py-docstring">        Read a text file from HDFS, a local file system (available on all</tt> </tt>
<a name="L277"></a><tt class="py-lineno">277</tt>  <tt class="py-line"><tt class="py-docstring">        nodes), or any Hadoop-supported file system URI, and return it as an</tt> </tt>
<a name="L278"></a><tt class="py-lineno">278</tt>  <tt class="py-line"><tt class="py-docstring">        RDD of Strings.</tt> </tt>
<a name="L279"></a><tt class="py-lineno">279</tt>  <tt class="py-line"><tt class="py-docstring">        </tt> </tt>
<a name="L280"></a><tt class="py-lineno">280</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; path = os.path.join(tempdir, "sample-text.txt")</tt> </tt>
<a name="L281"></a><tt class="py-lineno">281</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; with open(path, "w") as testFile:</tt> </tt>
<a name="L282"></a><tt class="py-lineno">282</tt>  <tt class="py-line"><tt class="py-docstring">        ...    testFile.write("Hello world!")</tt> </tt>
<a name="L283"></a><tt class="py-lineno">283</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; textFile = sc.textFile(path)</tt> </tt>
<a name="L284"></a><tt class="py-lineno">284</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; textFile.collect()</tt> </tt>
<a name="L285"></a><tt class="py-lineno">285</tt>  <tt class="py-line"><tt class="py-docstring">        [u'Hello world!']</tt> </tt>
<a name="L286"></a><tt class="py-lineno">286</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L287"></a><tt class="py-lineno">287</tt>  <tt class="py-line">        <tt class="py-name">minPartitions</tt> <tt class="py-op">=</tt> <tt class="py-name">minPartitions</tt> <tt class="py-keyword">or</tt> <tt id="link-117" class="py-name"><a title="pyspark.rdd.RDD.min
pyspark.statcounter.StatCounter.min" class="py-name" href="#" onclick="return doclink('link-117', 'min', 'link-113');">min</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-118" class="py-name"><a title="pyspark.context.SparkContext.defaultParallelism" class="py-name" href="#" onclick="return doclink('link-118', 'defaultParallelism', 'link-104');">defaultParallelism</a></tt><tt class="py-op">,</tt> <tt class="py-number">2</tt><tt class="py-op">)</tt> </tt>
<a name="L288"></a><tt class="py-lineno">288</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-119" class="py-name"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-119', 'RDD', 'link-25');">RDD</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-120" class="py-name" targets="Method pyspark.context.SparkContext.textFile()=pyspark.context.SparkContext-class.html#textFile"><a title="pyspark.context.SparkContext.textFile" class="py-name" href="#" onclick="return doclink('link-120', 'textFile', 'link-120');">textFile</a></tt><tt class="py-op">(</tt><tt id="link-121" class="py-name"><a title="pyspark.rdd.RDD.name" class="py-name" href="#" onclick="return doclink('link-121', 'name', 'link-115');">name</a></tt><tt class="py-op">,</tt> <tt class="py-name">minPartitions</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">,</tt> </tt>
<a name="L289"></a><tt class="py-lineno">289</tt>  <tt class="py-line">                   <tt class="py-name">UTF8Deserializer</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
</div><a name="L290"></a><tt class="py-lineno">290</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.wholeTextFiles"></a><div id="SparkContext.wholeTextFiles-def"><a name="L291"></a><tt class="py-lineno">291</tt> <a class="py-toggle" href="#" id="SparkContext.wholeTextFiles-toggle" onclick="return toggle('SparkContext.wholeTextFiles');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#wholeTextFiles">wholeTextFiles</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">path</tt><tt class="py-op">,</tt> <tt class="py-param">minPartitions</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.wholeTextFiles-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.wholeTextFiles-expanded"><a name="L292"></a><tt class="py-lineno">292</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L293"></a><tt class="py-lineno">293</tt>  <tt class="py-line"><tt class="py-docstring">        Read a directory of text files from HDFS, a local file system</tt> </tt>
<a name="L294"></a><tt class="py-lineno">294</tt>  <tt class="py-line"><tt class="py-docstring">        (available on all nodes), or any  Hadoop-supported file system</tt> </tt>
<a name="L295"></a><tt class="py-lineno">295</tt>  <tt class="py-line"><tt class="py-docstring">        URI. Each file is read as a single record and returned in a</tt> </tt>
<a name="L296"></a><tt class="py-lineno">296</tt>  <tt class="py-line"><tt class="py-docstring">        key-value pair, where the key is the path of each file, the</tt> </tt>
<a name="L297"></a><tt class="py-lineno">297</tt>  <tt class="py-line"><tt class="py-docstring">        value is the content of each file.</tt> </tt>
<a name="L298"></a><tt class="py-lineno">298</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L299"></a><tt class="py-lineno">299</tt>  <tt class="py-line"><tt class="py-docstring">        For example, if you have the following files::</tt> </tt>
<a name="L300"></a><tt class="py-lineno">300</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L301"></a><tt class="py-lineno">301</tt>  <tt class="py-line"><tt class="py-docstring">          hdfs://a-hdfs-path/part-00000</tt> </tt>
<a name="L302"></a><tt class="py-lineno">302</tt>  <tt class="py-line"><tt class="py-docstring">          hdfs://a-hdfs-path/part-00001</tt> </tt>
<a name="L303"></a><tt class="py-lineno">303</tt>  <tt class="py-line"><tt class="py-docstring">          ...</tt> </tt>
<a name="L304"></a><tt class="py-lineno">304</tt>  <tt class="py-line"><tt class="py-docstring">          hdfs://a-hdfs-path/part-nnnnn</tt> </tt>
<a name="L305"></a><tt class="py-lineno">305</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L306"></a><tt class="py-lineno">306</tt>  <tt class="py-line"><tt class="py-docstring">        Do C{rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")},</tt> </tt>
<a name="L307"></a><tt class="py-lineno">307</tt>  <tt class="py-line"><tt class="py-docstring">        then C{rdd} contains::</tt> </tt>
<a name="L308"></a><tt class="py-lineno">308</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L309"></a><tt class="py-lineno">309</tt>  <tt class="py-line"><tt class="py-docstring">          (a-hdfs-path/part-00000, its content)</tt> </tt>
<a name="L310"></a><tt class="py-lineno">310</tt>  <tt class="py-line"><tt class="py-docstring">          (a-hdfs-path/part-00001, its content)</tt> </tt>
<a name="L311"></a><tt class="py-lineno">311</tt>  <tt class="py-line"><tt class="py-docstring">          ...</tt> </tt>
<a name="L312"></a><tt class="py-lineno">312</tt>  <tt class="py-line"><tt class="py-docstring">          (a-hdfs-path/part-nnnnn, its content)</tt> </tt>
<a name="L313"></a><tt class="py-lineno">313</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L314"></a><tt class="py-lineno">314</tt>  <tt class="py-line"><tt class="py-docstring">        NOTE: Small files are preferred, as each file will be loaded</tt> </tt>
<a name="L315"></a><tt class="py-lineno">315</tt>  <tt class="py-line"><tt class="py-docstring">        fully in memory.</tt> </tt>
<a name="L316"></a><tt class="py-lineno">316</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L317"></a><tt class="py-lineno">317</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; dirPath = os.path.join(tempdir, "files")</tt> </tt>
<a name="L318"></a><tt class="py-lineno">318</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; os.mkdir(dirPath)</tt> </tt>
<a name="L319"></a><tt class="py-lineno">319</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; with open(os.path.join(dirPath, "1.txt"), "w") as file1:</tt> </tt>
<a name="L320"></a><tt class="py-lineno">320</tt>  <tt class="py-line"><tt class="py-docstring">        ...    file1.write("1")</tt> </tt>
<a name="L321"></a><tt class="py-lineno">321</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; with open(os.path.join(dirPath, "2.txt"), "w") as file2:</tt> </tt>
<a name="L322"></a><tt class="py-lineno">322</tt>  <tt class="py-line"><tt class="py-docstring">        ...    file2.write("2")</tt> </tt>
<a name="L323"></a><tt class="py-lineno">323</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; textFiles = sc.wholeTextFiles(dirPath)</tt> </tt>
<a name="L324"></a><tt class="py-lineno">324</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sorted(textFiles.collect())</tt> </tt>
<a name="L325"></a><tt class="py-lineno">325</tt>  <tt class="py-line"><tt class="py-docstring">        [(u'.../1.txt', u'1'), (u'.../2.txt', u'2')]</tt> </tt>
<a name="L326"></a><tt class="py-lineno">326</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L327"></a><tt class="py-lineno">327</tt>  <tt class="py-line">        <tt class="py-name">minPartitions</tt> <tt class="py-op">=</tt> <tt class="py-name">minPartitions</tt> <tt class="py-keyword">or</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-122" class="py-name"><a title="pyspark.context.SparkContext.defaultMinPartitions" class="py-name" href="#" onclick="return doclink('link-122', 'defaultMinPartitions', 'link-105');">defaultMinPartitions</a></tt> </tt>
<a name="L328"></a><tt class="py-lineno">328</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-123" class="py-name"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-123', 'RDD', 'link-25');">RDD</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-124" class="py-name" targets="Method pyspark.context.SparkContext.wholeTextFiles()=pyspark.context.SparkContext-class.html#wholeTextFiles"><a title="pyspark.context.SparkContext.wholeTextFiles" class="py-name" href="#" onclick="return doclink('link-124', 'wholeTextFiles', 'link-124');">wholeTextFiles</a></tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">,</tt> <tt class="py-name">minPartitions</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">,</tt> </tt>
<a name="L329"></a><tt class="py-lineno">329</tt>  <tt class="py-line">                   <tt class="py-name">PairDeserializer</tt><tt class="py-op">(</tt><tt class="py-name">UTF8Deserializer</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">UTF8Deserializer</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
</div><a name="L330"></a><tt class="py-lineno">330</tt>  <tt class="py-line"> </tt>
<a name="SparkContext._checkpointFile"></a><div id="SparkContext._checkpointFile-def"><a name="L331"></a><tt class="py-lineno">331</tt> <a class="py-toggle" href="#" id="SparkContext._checkpointFile-toggle" onclick="return toggle('SparkContext._checkpointFile');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#_checkpointFile">_checkpointFile</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">name</tt><tt class="py-op">,</tt> <tt class="py-param">input_deserializer</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext._checkpointFile-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext._checkpointFile-expanded"><a name="L332"></a><tt class="py-lineno">332</tt>  <tt class="py-line">        <tt class="py-name">jrdd</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">checkpointFile</tt><tt class="py-op">(</tt><tt id="link-125" class="py-name"><a title="pyspark.rdd.RDD.name" class="py-name" href="#" onclick="return doclink('link-125', 'name', 'link-115');">name</a></tt><tt class="py-op">)</tt> </tt>
<a name="L333"></a><tt class="py-lineno">333</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-126" class="py-name"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-126', 'RDD', 'link-25');">RDD</a></tt><tt class="py-op">(</tt><tt class="py-name">jrdd</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">,</tt> <tt class="py-name">input_deserializer</tt><tt class="py-op">)</tt> </tt>
</div><a name="L334"></a><tt class="py-lineno">334</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.union"></a><div id="SparkContext.union-def"><a name="L335"></a><tt class="py-lineno">335</tt> <a class="py-toggle" href="#" id="SparkContext.union-toggle" onclick="return toggle('SparkContext.union');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#union">union</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">rdds</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.union-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.union-expanded"><a name="L336"></a><tt class="py-lineno">336</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L337"></a><tt class="py-lineno">337</tt>  <tt class="py-line"><tt class="py-docstring">        Build the union of a list of RDDs.</tt> </tt>
<a name="L338"></a><tt class="py-lineno">338</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L339"></a><tt class="py-lineno">339</tt>  <tt class="py-line"><tt class="py-docstring">        This supports unions() of RDDs with different serialized formats,</tt> </tt>
<a name="L340"></a><tt class="py-lineno">340</tt>  <tt class="py-line"><tt class="py-docstring">        although this forces them to be reserialized using the default</tt> </tt>
<a name="L341"></a><tt class="py-lineno">341</tt>  <tt class="py-line"><tt class="py-docstring">        serializer:</tt> </tt>
<a name="L342"></a><tt class="py-lineno">342</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L343"></a><tt class="py-lineno">343</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt")</tt> </tt>
<a name="L344"></a><tt class="py-lineno">344</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; with open(path, "w") as testFile:</tt> </tt>
<a name="L345"></a><tt class="py-lineno">345</tt>  <tt class="py-line"><tt class="py-docstring">        ...    testFile.write("Hello")</tt> </tt>
<a name="L346"></a><tt class="py-lineno">346</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; textFile = sc.textFile(path)</tt> </tt>
<a name="L347"></a><tt class="py-lineno">347</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; textFile.collect()</tt> </tt>
<a name="L348"></a><tt class="py-lineno">348</tt>  <tt class="py-line"><tt class="py-docstring">        [u'Hello']</tt> </tt>
<a name="L349"></a><tt class="py-lineno">349</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; parallelized = sc.parallelize(["World!"])</tt> </tt>
<a name="L350"></a><tt class="py-lineno">350</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect())</tt> </tt>
<a name="L351"></a><tt class="py-lineno">351</tt>  <tt class="py-line"><tt class="py-docstring">        [u'Hello', 'World!']</tt> </tt>
<a name="L352"></a><tt class="py-lineno">352</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L353"></a><tt class="py-lineno">353</tt>  <tt class="py-line">        <tt class="py-name">first_jrdd_deserializer</tt> <tt class="py-op">=</tt> <tt class="py-name">rdds</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">_jrdd_deserializer</tt> </tt>
<a name="L354"></a><tt class="py-lineno">354</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">any</tt><tt class="py-op">(</tt><tt class="py-name">x</tt><tt class="py-op">.</tt><tt class="py-name">_jrdd_deserializer</tt> <tt class="py-op">!=</tt> <tt class="py-name">first_jrdd_deserializer</tt> <tt class="py-keyword">for</tt> <tt class="py-name">x</tt> <tt class="py-keyword">in</tt> <tt class="py-name">rdds</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L355"></a><tt class="py-lineno">355</tt>  <tt class="py-line">            <tt class="py-name">rdds</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-name">x</tt><tt class="py-op">.</tt><tt class="py-name">_reserialize</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-keyword">for</tt> <tt class="py-name">x</tt> <tt class="py-keyword">in</tt> <tt class="py-name">rdds</tt><tt class="py-op">]</tt> </tt>
<a name="L356"></a><tt class="py-lineno">356</tt>  <tt class="py-line">        <tt id="link-127" class="py-name" targets="Method pyspark.rdd.RDD.first()=pyspark.rdd.RDD-class.html#first"><a title="pyspark.rdd.RDD.first" class="py-name" href="#" onclick="return doclink('link-127', 'first', 'link-127');">first</a></tt> <tt class="py-op">=</tt> <tt class="py-name">rdds</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">_jrdd</tt> </tt>
<a name="L357"></a><tt class="py-lineno">357</tt>  <tt class="py-line">        <tt class="py-name">rest</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-name">x</tt><tt class="py-op">.</tt><tt class="py-name">_jrdd</tt> <tt class="py-keyword">for</tt> <tt class="py-name">x</tt> <tt class="py-keyword">in</tt> <tt class="py-name">rdds</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">:</tt><tt class="py-op">]</tt><tt class="py-op">]</tt> </tt>
<a name="L358"></a><tt class="py-lineno">358</tt>  <tt class="py-line">        <tt class="py-name">rest</tt> <tt class="py-op">=</tt> <tt class="py-name">ListConverter</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">convert</tt><tt class="py-op">(</tt><tt class="py-name">rest</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-128" class="py-name"><a title="pyspark.context.SparkContext._gateway" class="py-name" href="#" onclick="return doclink('link-128', '_gateway', 'link-26');">_gateway</a></tt><tt class="py-op">.</tt><tt class="py-name">_gateway_client</tt><tt class="py-op">)</tt> </tt>
<a name="L359"></a><tt class="py-lineno">359</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-129" class="py-name"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-129', 'RDD', 'link-25');">RDD</a></tt><tt class="py-op">(</tt><tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-130" class="py-name" targets="Method pyspark.context.SparkContext.union()=pyspark.context.SparkContext-class.html#union,Method pyspark.rdd.RDD.union()=pyspark.rdd.RDD-class.html#union"><a title="pyspark.context.SparkContext.union
pyspark.rdd.RDD.union" class="py-name" href="#" onclick="return doclink('link-130', 'union', 'link-130');">union</a></tt><tt class="py-op">(</tt><tt id="link-131" class="py-name"><a title="pyspark.rdd.RDD.first" class="py-name" href="#" onclick="return doclink('link-131', 'first', 'link-127');">first</a></tt><tt class="py-op">,</tt> <tt class="py-name">rest</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">self</tt><tt class="py-op">,</tt> </tt>
<a name="L360"></a><tt class="py-lineno">360</tt>  <tt class="py-line">                   <tt class="py-name">rdds</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">_jrdd_deserializer</tt><tt class="py-op">)</tt> </tt>
</div><a name="L361"></a><tt class="py-lineno">361</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.broadcast"></a><div id="SparkContext.broadcast-def"><a name="L362"></a><tt class="py-lineno">362</tt> <a class="py-toggle" href="#" id="SparkContext.broadcast-toggle" onclick="return toggle('SparkContext.broadcast');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#broadcast">broadcast</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.broadcast-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.broadcast-expanded"><a name="L363"></a><tt class="py-lineno">363</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L364"></a><tt class="py-lineno">364</tt>  <tt class="py-line"><tt class="py-docstring">        Broadcast a read-only variable to the cluster, returning a</tt> </tt>
<a name="L365"></a><tt class="py-lineno">365</tt>  <tt class="py-line"><tt class="py-docstring">        L{Broadcast&lt;pyspark.broadcast.Broadcast&gt;}</tt> </tt>
<a name="L366"></a><tt class="py-lineno">366</tt>  <tt class="py-line"><tt class="py-docstring">        object for reading it in distributed functions. The variable will be</tt> </tt>
<a name="L367"></a><tt class="py-lineno">367</tt>  <tt class="py-line"><tt class="py-docstring">        sent to each cluster only once.</tt> </tt>
<a name="L368"></a><tt class="py-lineno">368</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L369"></a><tt class="py-lineno">369</tt>  <tt class="py-line">        <tt id="link-132" class="py-name" targets="Variable pyspark.accumulators.pickleSer=pyspark.accumulators-module.html#pickleSer"><a title="pyspark.accumulators.pickleSer" class="py-name" href="#" onclick="return doclink('link-132', 'pickleSer', 'link-132');">pickleSer</a></tt> <tt class="py-op">=</tt> <tt id="link-133" class="py-name"><a title="pyspark.serializers.PickleSerializer" class="py-name" href="#" onclick="return doclink('link-133', 'PickleSerializer', 'link-17');">PickleSerializer</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L370"></a><tt class="py-lineno">370</tt>  <tt class="py-line">        <tt class="py-name">pickled</tt> <tt class="py-op">=</tt> <tt id="link-134" class="py-name"><a title="pyspark.accumulators.pickleSer" class="py-name" href="#" onclick="return doclink('link-134', 'pickleSer', 'link-132');">pickleSer</a></tt><tt class="py-op">.</tt><tt id="link-135" class="py-name" targets="Variable pyspark.serializers.MarshalSerializer.dumps=pyspark.serializers.MarshalSerializer-class.html#dumps,Method pyspark.serializers.PickleSerializer.dumps()=pyspark.serializers.PickleSerializer-class.html#dumps"><a title="pyspark.serializers.MarshalSerializer.dumps
pyspark.serializers.PickleSerializer.dumps" class="py-name" href="#" onclick="return doclink('link-135', 'dumps', 'link-135');">dumps</a></tt><tt class="py-op">(</tt><tt id="link-136" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-136', 'value', 'link-44');">value</a></tt><tt class="py-op">)</tt> </tt>
<a name="L371"></a><tt class="py-lineno">371</tt>  <tt class="py-line">        <tt class="py-name">jbroadcast</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-137" class="py-name"><a title="pyspark.broadcast
pyspark.context.SparkContext.broadcast" class="py-name" href="#" onclick="return doclink('link-137', 'broadcast', 'link-6');">broadcast</a></tt><tt class="py-op">(</tt><tt class="py-name">bytearray</tt><tt class="py-op">(</tt><tt class="py-name">pickled</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L372"></a><tt class="py-lineno">372</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-138" class="py-name"><a title="pyspark.broadcast.Broadcast" class="py-name" href="#" onclick="return doclink('link-138', 'Broadcast', 'link-7');">Broadcast</a></tt><tt class="py-op">(</tt><tt class="py-name">jbroadcast</tt><tt class="py-op">.</tt><tt id="link-139" class="py-name" targets="Method pyspark.rdd.RDD.id()=pyspark.rdd.RDD-class.html#id"><a title="pyspark.rdd.RDD.id" class="py-name" href="#" onclick="return doclink('link-139', 'id', 'link-139');">id</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt id="link-140" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-140', 'value', 'link-44');">value</a></tt><tt class="py-op">,</tt> <tt class="py-name">jbroadcast</tt><tt class="py-op">,</tt> </tt>
<a name="L373"></a><tt class="py-lineno">373</tt>  <tt class="py-line">                         <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_pickled_broadcast_vars</tt><tt class="py-op">)</tt> </tt>
</div><a name="L374"></a><tt class="py-lineno">374</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.accumulator"></a><div id="SparkContext.accumulator-def"><a name="L375"></a><tt class="py-lineno">375</tt> <a class="py-toggle" href="#" id="SparkContext.accumulator-toggle" onclick="return toggle('SparkContext.accumulator');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#accumulator">accumulator</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">,</tt> <tt class="py-param">accum_param</tt><tt class="py-op">=</tt><tt class="py-name">None</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.accumulator-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.accumulator-expanded"><a name="L376"></a><tt class="py-lineno">376</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L377"></a><tt class="py-lineno">377</tt>  <tt class="py-line"><tt class="py-docstring">        Create an L{Accumulator} with the given initial value, using a given</tt> </tt>
<a name="L378"></a><tt class="py-lineno">378</tt>  <tt class="py-line"><tt class="py-docstring">        L{AccumulatorParam} helper object to define how to add values of the</tt> </tt>
<a name="L379"></a><tt class="py-lineno">379</tt>  <tt class="py-line"><tt class="py-docstring">        data type if provided. Default AccumulatorParams are used for integers</tt> </tt>
<a name="L380"></a><tt class="py-lineno">380</tt>  <tt class="py-line"><tt class="py-docstring">        and floating-point numbers if you do not provide one. For other types,</tt> </tt>
<a name="L381"></a><tt class="py-lineno">381</tt>  <tt class="py-line"><tt class="py-docstring">        a custom AccumulatorParam can be used.</tt> </tt>
<a name="L382"></a><tt class="py-lineno">382</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L383"></a><tt class="py-lineno">383</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">accum_param</tt> <tt class="py-keyword">is</tt> <tt class="py-name">None</tt><tt class="py-op">:</tt> </tt>
<a name="L384"></a><tt class="py-lineno">384</tt>  <tt class="py-line">            <tt class="py-keyword">if</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt id="link-141" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-141', 'value', 'link-44');">value</a></tt><tt class="py-op">,</tt> <tt class="py-name">int</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L385"></a><tt class="py-lineno">385</tt>  <tt class="py-line">                <tt class="py-name">accum_param</tt> <tt class="py-op">=</tt> <tt id="link-142" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-142', 'accumulators', 'link-1');">accumulators</a></tt><tt class="py-op">.</tt><tt id="link-143" class="py-name" targets="Variable pyspark.accumulators.INT_ACCUMULATOR_PARAM=pyspark.accumulators-module.html#INT_ACCUMULATOR_PARAM"><a title="pyspark.accumulators.INT_ACCUMULATOR_PARAM" class="py-name" href="#" onclick="return doclink('link-143', 'INT_ACCUMULATOR_PARAM', 'link-143');">INT_ACCUMULATOR_PARAM</a></tt> </tt>
<a name="L386"></a><tt class="py-lineno">386</tt>  <tt class="py-line">            <tt class="py-keyword">elif</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt id="link-144" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-144', 'value', 'link-44');">value</a></tt><tt class="py-op">,</tt> <tt class="py-name">float</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L387"></a><tt class="py-lineno">387</tt>  <tt class="py-line">                <tt class="py-name">accum_param</tt> <tt class="py-op">=</tt> <tt id="link-145" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-145', 'accumulators', 'link-1');">accumulators</a></tt><tt class="py-op">.</tt><tt id="link-146" class="py-name" targets="Variable pyspark.accumulators.FLOAT_ACCUMULATOR_PARAM=pyspark.accumulators-module.html#FLOAT_ACCUMULATOR_PARAM"><a title="pyspark.accumulators.FLOAT_ACCUMULATOR_PARAM" class="py-name" href="#" onclick="return doclink('link-146', 'FLOAT_ACCUMULATOR_PARAM', 'link-146');">FLOAT_ACCUMULATOR_PARAM</a></tt> </tt>
<a name="L388"></a><tt class="py-lineno">388</tt>  <tt class="py-line">            <tt class="py-keyword">elif</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt id="link-147" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-147', 'value', 'link-44');">value</a></tt><tt class="py-op">,</tt> <tt class="py-name">complex</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L389"></a><tt class="py-lineno">389</tt>  <tt class="py-line">                <tt class="py-name">accum_param</tt> <tt class="py-op">=</tt> <tt id="link-148" class="py-name"><a title="pyspark.accumulators" class="py-name" href="#" onclick="return doclink('link-148', 'accumulators', 'link-1');">accumulators</a></tt><tt class="py-op">.</tt><tt id="link-149" class="py-name" targets="Variable pyspark.accumulators.COMPLEX_ACCUMULATOR_PARAM=pyspark.accumulators-module.html#COMPLEX_ACCUMULATOR_PARAM"><a title="pyspark.accumulators.COMPLEX_ACCUMULATOR_PARAM" class="py-name" href="#" onclick="return doclink('link-149', 'COMPLEX_ACCUMULATOR_PARAM', 'link-149');">COMPLEX_ACCUMULATOR_PARAM</a></tt> </tt>
<a name="L390"></a><tt class="py-lineno">390</tt>  <tt class="py-line">            <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L391"></a><tt class="py-lineno">391</tt>  <tt class="py-line">                <tt class="py-keyword">raise</tt> <tt class="py-name">Exception</tt><tt class="py-op">(</tt><tt class="py-string">"No default accumulator param for type %s"</tt> <tt class="py-op">%</tt> <tt class="py-name">type</tt><tt class="py-op">(</tt><tt id="link-150" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-150', 'value', 'link-44');">value</a></tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L392"></a><tt class="py-lineno">392</tt>  <tt class="py-line">        <tt id="link-151" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-151', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-152" class="py-name"><a title="pyspark.context.SparkContext._next_accum_id" class="py-name" href="#" onclick="return doclink('link-152', '_next_accum_id', 'link-29');">_next_accum_id</a></tt> <tt class="py-op">+=</tt> <tt class="py-number">1</tt> </tt>
<a name="L393"></a><tt class="py-lineno">393</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt id="link-153" class="py-name"><a title="pyspark.accumulators.Accumulator" class="py-name" href="#" onclick="return doclink('link-153', 'Accumulator', 'link-4');">Accumulator</a></tt><tt class="py-op">(</tt><tt id="link-154" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-154', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">.</tt><tt id="link-155" class="py-name"><a title="pyspark.context.SparkContext._next_accum_id" class="py-name" href="#" onclick="return doclink('link-155', '_next_accum_id', 'link-29');">_next_accum_id</a></tt> <tt class="py-op">-</tt> <tt class="py-number">1</tt><tt class="py-op">,</tt> <tt id="link-156" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-156', 'value', 'link-44');">value</a></tt><tt class="py-op">,</tt> <tt class="py-name">accum_param</tt><tt class="py-op">)</tt> </tt>
</div><a name="L394"></a><tt class="py-lineno">394</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.addFile"></a><div id="SparkContext.addFile-def"><a name="L395"></a><tt class="py-lineno">395</tt> <a class="py-toggle" href="#" id="SparkContext.addFile-toggle" onclick="return toggle('SparkContext.addFile');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#addFile">addFile</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">path</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.addFile-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.addFile-expanded"><a name="L396"></a><tt class="py-lineno">396</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L397"></a><tt class="py-lineno">397</tt>  <tt class="py-line"><tt class="py-docstring">        Add a file to be downloaded with this Spark job on every node.</tt> </tt>
<a name="L398"></a><tt class="py-lineno">398</tt>  <tt class="py-line"><tt class="py-docstring">        The C{path} passed can be either a local file, a file in HDFS</tt> </tt>
<a name="L399"></a><tt class="py-lineno">399</tt>  <tt class="py-line"><tt class="py-docstring">        (or other Hadoop-supported filesystems), or an HTTP, HTTPS or</tt> </tt>
<a name="L400"></a><tt class="py-lineno">400</tt>  <tt class="py-line"><tt class="py-docstring">        FTP URI.</tt> </tt>
<a name="L401"></a><tt class="py-lineno">401</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L402"></a><tt class="py-lineno">402</tt>  <tt class="py-line"><tt class="py-docstring">        To access the file in Spark jobs, use</tt> </tt>
<a name="L403"></a><tt class="py-lineno">403</tt>  <tt class="py-line"><tt class="py-docstring">        L{SparkFiles.get(path)&lt;pyspark.files.SparkFiles.get&gt;} to find its</tt> </tt>
<a name="L404"></a><tt class="py-lineno">404</tt>  <tt class="py-line"><tt class="py-docstring">        download location.</tt> </tt>
<a name="L405"></a><tt class="py-lineno">405</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L406"></a><tt class="py-lineno">406</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; from pyspark import SparkFiles</tt> </tt>
<a name="L407"></a><tt class="py-lineno">407</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; path = os.path.join(tempdir, "test.txt")</tt> </tt>
<a name="L408"></a><tt class="py-lineno">408</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; with open(path, "w") as testFile:</tt> </tt>
<a name="L409"></a><tt class="py-lineno">409</tt>  <tt class="py-line"><tt class="py-docstring">        ...    testFile.write("100")</tt> </tt>
<a name="L410"></a><tt class="py-lineno">410</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sc.addFile(path)</tt> </tt>
<a name="L411"></a><tt class="py-lineno">411</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; def func(iterator):</tt> </tt>
<a name="L412"></a><tt class="py-lineno">412</tt>  <tt class="py-line"><tt class="py-docstring">        ...    with open(SparkFiles.get("test.txt")) as testFile:</tt> </tt>
<a name="L413"></a><tt class="py-lineno">413</tt>  <tt class="py-line"><tt class="py-docstring">        ...        fileVal = int(testFile.readline())</tt> </tt>
<a name="L414"></a><tt class="py-lineno">414</tt>  <tt class="py-line"><tt class="py-docstring">        ...        return [x * 100 for x in iterator]</tt> </tt>
<a name="L415"></a><tt class="py-lineno">415</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()</tt> </tt>
<a name="L416"></a><tt class="py-lineno">416</tt>  <tt class="py-line"><tt class="py-docstring">        [100, 200, 300, 400]</tt> </tt>
<a name="L417"></a><tt class="py-lineno">417</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L418"></a><tt class="py-lineno">418</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-157" class="py-name" targets="Method pyspark.context.SparkContext.addFile()=pyspark.context.SparkContext-class.html#addFile"><a title="pyspark.context.SparkContext.addFile" class="py-name" href="#" onclick="return doclink('link-157', 'addFile', 'link-157');">addFile</a></tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">)</tt> </tt>
</div><a name="L419"></a><tt class="py-lineno">419</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.clearFiles"></a><div id="SparkContext.clearFiles-def"><a name="L420"></a><tt class="py-lineno">420</tt> <a class="py-toggle" href="#" id="SparkContext.clearFiles-toggle" onclick="return toggle('SparkContext.clearFiles');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#clearFiles">clearFiles</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.clearFiles-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.clearFiles-expanded"><a name="L421"></a><tt class="py-lineno">421</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L422"></a><tt class="py-lineno">422</tt>  <tt class="py-line"><tt class="py-docstring">        Clear the job's list of files added by L{addFile} or L{addPyFile} so</tt> </tt>
<a name="L423"></a><tt class="py-lineno">423</tt>  <tt class="py-line"><tt class="py-docstring">        that they do not get downloaded to any new nodes.</tt> </tt>
<a name="L424"></a><tt class="py-lineno">424</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L425"></a><tt class="py-lineno">425</tt>  <tt class="py-line">        <tt class="py-comment"># TODO: remove added .py or .zip files from the PYTHONPATH?</tt> </tt>
<a name="L426"></a><tt class="py-lineno">426</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-158" class="py-name" targets="Method pyspark.context.SparkContext.clearFiles()=pyspark.context.SparkContext-class.html#clearFiles"><a title="pyspark.context.SparkContext.clearFiles" class="py-name" href="#" onclick="return doclink('link-158', 'clearFiles', 'link-158');">clearFiles</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L427"></a><tt class="py-lineno">427</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.addPyFile"></a><div id="SparkContext.addPyFile-def"><a name="L428"></a><tt class="py-lineno">428</tt> <a class="py-toggle" href="#" id="SparkContext.addPyFile-toggle" onclick="return toggle('SparkContext.addPyFile');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#addPyFile">addPyFile</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">path</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.addPyFile-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.addPyFile-expanded"><a name="L429"></a><tt class="py-lineno">429</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L430"></a><tt class="py-lineno">430</tt>  <tt class="py-line"><tt class="py-docstring">        Add a .py or .zip dependency for all tasks to be executed on this</tt> </tt>
<a name="L431"></a><tt class="py-lineno">431</tt>  <tt class="py-line"><tt class="py-docstring">        SparkContext in the future.  The C{path} passed can be either a local</tt> </tt>
<a name="L432"></a><tt class="py-lineno">432</tt>  <tt class="py-line"><tt class="py-docstring">        file, a file in HDFS (or other Hadoop-supported filesystems), or an</tt> </tt>
<a name="L433"></a><tt class="py-lineno">433</tt>  <tt class="py-line"><tt class="py-docstring">        HTTP, HTTPS or FTP URI.</tt> </tt>
<a name="L434"></a><tt class="py-lineno">434</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L435"></a><tt class="py-lineno">435</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-159" class="py-name"><a title="pyspark.context.SparkContext.addFile" class="py-name" href="#" onclick="return doclink('link-159', 'addFile', 'link-157');">addFile</a></tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">)</tt> </tt>
<a name="L436"></a><tt class="py-lineno">436</tt>  <tt class="py-line">        <tt class="py-op">(</tt><tt class="py-name">dirname</tt><tt class="py-op">,</tt> <tt class="py-name">filename</tt><tt class="py-op">)</tt> <tt class="py-op">=</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">path</tt><tt class="py-op">)</tt> <tt class="py-comment"># dirname may be directory or HDFS/S3 prefix</tt> </tt>
<a name="L437"></a><tt class="py-lineno">437</tt>  <tt class="py-line"> </tt>
<a name="L438"></a><tt class="py-lineno">438</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">filename</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.zip'</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-name">filename</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.ZIP'</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-name">filename</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.egg'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L439"></a><tt class="py-lineno">439</tt>  <tt class="py-line">            <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-160" class="py-name"><a title="pyspark.context.SparkContext._python_includes" class="py-name" href="#" onclick="return doclink('link-160', '_python_includes', 'link-32');">_python_includes</a></tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">filename</tt><tt class="py-op">)</tt> </tt>
<a name="L440"></a><tt class="py-lineno">440</tt>  <tt class="py-line">            <tt class="py-name">sys</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt id="link-161" class="py-name" targets="Method pyspark.rdd.RDD.join()=pyspark.rdd.RDD-class.html#join"><a title="pyspark.rdd.RDD.join" class="py-name" href="#" onclick="return doclink('link-161', 'join', 'link-161');">join</a></tt><tt class="py-op">(</tt><tt id="link-162" class="py-name"><a title="pyspark.files.SparkFiles" class="py-name" href="#" onclick="return doclink('link-162', 'SparkFiles', 'link-13');">SparkFiles</a></tt><tt class="py-op">.</tt><tt id="link-163" class="py-name"><a title="pyspark.files.SparkFiles.getRootDirectory" class="py-name" href="#" onclick="return doclink('link-163', 'getRootDirectory', 'link-63');">getRootDirectory</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">,</tt> <tt class="py-name">filename</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> <tt class="py-comment"># for tests in local mode</tt> </tt>
</div><a name="L441"></a><tt class="py-lineno">441</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.setCheckpointDir"></a><div id="SparkContext.setCheckpointDir-def"><a name="L442"></a><tt class="py-lineno">442</tt> <a class="py-toggle" href="#" id="SparkContext.setCheckpointDir-toggle" onclick="return toggle('SparkContext.setCheckpointDir');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#setCheckpointDir">setCheckpointDir</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">dirName</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.setCheckpointDir-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.setCheckpointDir-expanded"><a name="L443"></a><tt class="py-lineno">443</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L444"></a><tt class="py-lineno">444</tt>  <tt class="py-line"><tt class="py-docstring">        Set the directory under which RDDs are going to be checkpointed. The</tt> </tt>
<a name="L445"></a><tt class="py-lineno">445</tt>  <tt class="py-line"><tt class="py-docstring">        directory must be a HDFS path if running on a cluster.</tt> </tt>
<a name="L446"></a><tt class="py-lineno">446</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L447"></a><tt class="py-lineno">447</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-164" class="py-name" targets="Method pyspark.context.SparkContext.setCheckpointDir()=pyspark.context.SparkContext-class.html#setCheckpointDir"><a title="pyspark.context.SparkContext.setCheckpointDir" class="py-name" href="#" onclick="return doclink('link-164', 'setCheckpointDir', 'link-164');">setCheckpointDir</a></tt><tt class="py-op">(</tt><tt class="py-name">dirName</tt><tt class="py-op">)</tt> </tt>
</div><a name="L448"></a><tt class="py-lineno">448</tt>  <tt class="py-line"> </tt>
<a name="SparkContext._getJavaStorageLevel"></a><div id="SparkContext._getJavaStorageLevel-def"><a name="L449"></a><tt class="py-lineno">449</tt> <a class="py-toggle" href="#" id="SparkContext._getJavaStorageLevel-toggle" onclick="return toggle('SparkContext._getJavaStorageLevel');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#_getJavaStorageLevel">_getJavaStorageLevel</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">storageLevel</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext._getJavaStorageLevel-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext._getJavaStorageLevel-expanded"><a name="L450"></a><tt class="py-lineno">450</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L451"></a><tt class="py-lineno">451</tt>  <tt class="py-line"><tt class="py-docstring">        Returns a Java StorageLevel based on a pyspark.StorageLevel.</tt> </tt>
<a name="L452"></a><tt class="py-lineno">452</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L453"></a><tt class="py-lineno">453</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">isinstance</tt><tt class="py-op">(</tt><tt class="py-name">storageLevel</tt><tt class="py-op">,</tt> <tt id="link-165" class="py-name"><a title="pyspark.storagelevel.StorageLevel" class="py-name" href="#" onclick="return doclink('link-165', 'StorageLevel', 'link-20');">StorageLevel</a></tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L454"></a><tt class="py-lineno">454</tt>  <tt class="py-line">            <tt class="py-keyword">raise</tt> <tt class="py-name">Exception</tt><tt class="py-op">(</tt><tt class="py-string">"storageLevel must be of type pyspark.StorageLevel"</tt><tt class="py-op">)</tt> </tt>
<a name="L455"></a><tt class="py-lineno">455</tt>  <tt class="py-line"> </tt>
<a name="L456"></a><tt class="py-lineno">456</tt>  <tt class="py-line">        <tt class="py-name">newStorageLevel</tt> <tt class="py-op">=</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt id="link-166" class="py-name"><a title="pyspark.context.SparkContext._jvm" class="py-name" href="#" onclick="return doclink('link-166', '_jvm', 'link-27');">_jvm</a></tt><tt class="py-op">.</tt><tt class="py-name">org</tt><tt class="py-op">.</tt><tt class="py-name">apache</tt><tt class="py-op">.</tt><tt class="py-name">spark</tt><tt class="py-op">.</tt><tt class="py-name">storage</tt><tt class="py-op">.</tt><tt id="link-167" class="py-name"><a title="pyspark.storagelevel.StorageLevel" class="py-name" href="#" onclick="return doclink('link-167', 'StorageLevel', 'link-20');">StorageLevel</a></tt> </tt>
<a name="L457"></a><tt class="py-lineno">457</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">newStorageLevel</tt><tt class="py-op">(</tt><tt class="py-name">storageLevel</tt><tt class="py-op">.</tt><tt class="py-name">useDisk</tt><tt class="py-op">,</tt> </tt>
<a name="L458"></a><tt class="py-lineno">458</tt>  <tt class="py-line">                               <tt class="py-name">storageLevel</tt><tt class="py-op">.</tt><tt class="py-name">useMemory</tt><tt class="py-op">,</tt> </tt>
<a name="L459"></a><tt class="py-lineno">459</tt>  <tt class="py-line">                               <tt class="py-name">storageLevel</tt><tt class="py-op">.</tt><tt class="py-name">useOffHeap</tt><tt class="py-op">,</tt> </tt>
<a name="L460"></a><tt class="py-lineno">460</tt>  <tt class="py-line">                               <tt class="py-name">storageLevel</tt><tt class="py-op">.</tt><tt class="py-name">deserialized</tt><tt class="py-op">,</tt> </tt>
<a name="L461"></a><tt class="py-lineno">461</tt>  <tt class="py-line">                               <tt class="py-name">storageLevel</tt><tt class="py-op">.</tt><tt class="py-name">replication</tt><tt class="py-op">)</tt> </tt>
</div><a name="L462"></a><tt class="py-lineno">462</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.setJobGroup"></a><div id="SparkContext.setJobGroup-def"><a name="L463"></a><tt class="py-lineno">463</tt> <a class="py-toggle" href="#" id="SparkContext.setJobGroup-toggle" onclick="return toggle('SparkContext.setJobGroup');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#setJobGroup">setJobGroup</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">groupId</tt><tt class="py-op">,</tt> <tt class="py-param">description</tt><tt class="py-op">,</tt> <tt class="py-param">interruptOnCancel</tt><tt class="py-op">=</tt><tt class="py-name">False</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.setJobGroup-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.setJobGroup-expanded"><a name="L464"></a><tt class="py-lineno">464</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L465"></a><tt class="py-lineno">465</tt>  <tt class="py-line"><tt class="py-docstring">        Assigns a group ID to all the jobs started by this thread until the group ID is set to a</tt> </tt>
<a name="L466"></a><tt class="py-lineno">466</tt>  <tt class="py-line"><tt class="py-docstring">        different value or cleared.</tt> </tt>
<a name="L467"></a><tt class="py-lineno">467</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L468"></a><tt class="py-lineno">468</tt>  <tt class="py-line"><tt class="py-docstring">        Often, a unit of execution in an application consists of multiple Spark actions or jobs.</tt> </tt>
<a name="L469"></a><tt class="py-lineno">469</tt>  <tt class="py-line"><tt class="py-docstring">        Application programmers can use this method to group all those jobs together and give a</tt> </tt>
<a name="L470"></a><tt class="py-lineno">470</tt>  <tt class="py-line"><tt class="py-docstring">        group description. Once set, the Spark web UI will associate such jobs with this group.</tt> </tt>
<a name="L471"></a><tt class="py-lineno">471</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L472"></a><tt class="py-lineno">472</tt>  <tt class="py-line"><tt class="py-docstring">        The application can use L{SparkContext.cancelJobGroup} to cancel all</tt> </tt>
<a name="L473"></a><tt class="py-lineno">473</tt>  <tt class="py-line"><tt class="py-docstring">        running jobs in this group.</tt> </tt>
<a name="L474"></a><tt class="py-lineno">474</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L475"></a><tt class="py-lineno">475</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; import thread, threading</tt> </tt>
<a name="L476"></a><tt class="py-lineno">476</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; from time import sleep</tt> </tt>
<a name="L477"></a><tt class="py-lineno">477</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; result = "Not Set"</tt> </tt>
<a name="L478"></a><tt class="py-lineno">478</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; lock = threading.Lock()</tt> </tt>
<a name="L479"></a><tt class="py-lineno">479</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; def map_func(x):</tt> </tt>
<a name="L480"></a><tt class="py-lineno">480</tt>  <tt class="py-line"><tt class="py-docstring">        ...     sleep(100)</tt> </tt>
<a name="L481"></a><tt class="py-lineno">481</tt>  <tt class="py-line"><tt class="py-docstring">        ...     raise Exception("Task should have been cancelled")</tt> </tt>
<a name="L482"></a><tt class="py-lineno">482</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; def start_job(x):</tt> </tt>
<a name="L483"></a><tt class="py-lineno">483</tt>  <tt class="py-line"><tt class="py-docstring">        ...     global result</tt> </tt>
<a name="L484"></a><tt class="py-lineno">484</tt>  <tt class="py-line"><tt class="py-docstring">        ...     try:</tt> </tt>
<a name="L485"></a><tt class="py-lineno">485</tt>  <tt class="py-line"><tt class="py-docstring">        ...         sc.setJobGroup("job_to_cancel", "some description")</tt> </tt>
<a name="L486"></a><tt class="py-lineno">486</tt>  <tt class="py-line"><tt class="py-docstring">        ...         result = sc.parallelize(range(x)).map(map_func).collect()</tt> </tt>
<a name="L487"></a><tt class="py-lineno">487</tt>  <tt class="py-line"><tt class="py-docstring">        ...     except Exception as e:</tt> </tt>
<a name="L488"></a><tt class="py-lineno">488</tt>  <tt class="py-line"><tt class="py-docstring">        ...         result = "Cancelled"</tt> </tt>
<a name="L489"></a><tt class="py-lineno">489</tt>  <tt class="py-line"><tt class="py-docstring">        ...     lock.release()</tt> </tt>
<a name="L490"></a><tt class="py-lineno">490</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; def stop_job():</tt> </tt>
<a name="L491"></a><tt class="py-lineno">491</tt>  <tt class="py-line"><tt class="py-docstring">        ...     sleep(5)</tt> </tt>
<a name="L492"></a><tt class="py-lineno">492</tt>  <tt class="py-line"><tt class="py-docstring">        ...     sc.cancelJobGroup("job_to_cancel")</tt> </tt>
<a name="L493"></a><tt class="py-lineno">493</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; supress = lock.acquire()</tt> </tt>
<a name="L494"></a><tt class="py-lineno">494</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; supress = thread.start_new_thread(start_job, (10,))</tt> </tt>
<a name="L495"></a><tt class="py-lineno">495</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; supress = thread.start_new_thread(stop_job, tuple())</tt> </tt>
<a name="L496"></a><tt class="py-lineno">496</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; supress = lock.acquire()</tt> </tt>
<a name="L497"></a><tt class="py-lineno">497</tt>  <tt class="py-line"><tt class="py-docstring">        &gt;&gt;&gt; print result</tt> </tt>
<a name="L498"></a><tt class="py-lineno">498</tt>  <tt class="py-line"><tt class="py-docstring">        Cancelled</tt> </tt>
<a name="L499"></a><tt class="py-lineno">499</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L500"></a><tt class="py-lineno">500</tt>  <tt class="py-line"><tt class="py-docstring">        If interruptOnCancel is set to true for the job group, then job cancellation will result</tt> </tt>
<a name="L501"></a><tt class="py-lineno">501</tt>  <tt class="py-line"><tt class="py-docstring">        in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure</tt> </tt>
<a name="L502"></a><tt class="py-lineno">502</tt>  <tt class="py-line"><tt class="py-docstring">        that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208,</tt> </tt>
<a name="L503"></a><tt class="py-lineno">503</tt>  <tt class="py-line"><tt class="py-docstring">        where HDFS may respond to Thread.interrupt() by marking nodes as dead.</tt> </tt>
<a name="L504"></a><tt class="py-lineno">504</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L505"></a><tt class="py-lineno">505</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-168" class="py-name" targets="Method pyspark.context.SparkContext.setJobGroup()=pyspark.context.SparkContext-class.html#setJobGroup"><a title="pyspark.context.SparkContext.setJobGroup" class="py-name" href="#" onclick="return doclink('link-168', 'setJobGroup', 'link-168');">setJobGroup</a></tt><tt class="py-op">(</tt><tt class="py-name">groupId</tt><tt class="py-op">,</tt> <tt class="py-name">description</tt><tt class="py-op">,</tt> <tt class="py-name">interruptOnCancel</tt><tt class="py-op">)</tt> </tt>
</div><a name="L506"></a><tt class="py-lineno">506</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.setLocalProperty"></a><div id="SparkContext.setLocalProperty-def"><a name="L507"></a><tt class="py-lineno">507</tt> <a class="py-toggle" href="#" id="SparkContext.setLocalProperty-toggle" onclick="return toggle('SparkContext.setLocalProperty');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#setLocalProperty">setLocalProperty</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">key</tt><tt class="py-op">,</tt> <tt class="py-param">value</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.setLocalProperty-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.setLocalProperty-expanded"><a name="L508"></a><tt class="py-lineno">508</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L509"></a><tt class="py-lineno">509</tt>  <tt class="py-line"><tt class="py-docstring">        Set a local property that affects jobs submitted from this thread, such as the</tt> </tt>
<a name="L510"></a><tt class="py-lineno">510</tt>  <tt class="py-line"><tt class="py-docstring">        Spark fair scheduler pool.</tt> </tt>
<a name="L511"></a><tt class="py-lineno">511</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L512"></a><tt class="py-lineno">512</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-169" class="py-name" targets="Method pyspark.context.SparkContext.setLocalProperty()=pyspark.context.SparkContext-class.html#setLocalProperty"><a title="pyspark.context.SparkContext.setLocalProperty" class="py-name" href="#" onclick="return doclink('link-169', 'setLocalProperty', 'link-169');">setLocalProperty</a></tt><tt class="py-op">(</tt><tt class="py-name">key</tt><tt class="py-op">,</tt> <tt id="link-170" class="py-name"><a title="pyspark.accumulators.Accumulator.value" class="py-name" href="#" onclick="return doclink('link-170', 'value', 'link-44');">value</a></tt><tt class="py-op">)</tt> </tt>
</div><a name="L513"></a><tt class="py-lineno">513</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.getLocalProperty"></a><div id="SparkContext.getLocalProperty-def"><a name="L514"></a><tt class="py-lineno">514</tt> <a class="py-toggle" href="#" id="SparkContext.getLocalProperty-toggle" onclick="return toggle('SparkContext.getLocalProperty');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#getLocalProperty">getLocalProperty</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">key</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.getLocalProperty-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.getLocalProperty-expanded"><a name="L515"></a><tt class="py-lineno">515</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L516"></a><tt class="py-lineno">516</tt>  <tt class="py-line"><tt class="py-docstring">        Get a local property set in this thread, or null if it is missing. See</tt> </tt>
<a name="L517"></a><tt class="py-lineno">517</tt>  <tt class="py-line"><tt class="py-docstring">        L{setLocalProperty}</tt> </tt>
<a name="L518"></a><tt class="py-lineno">518</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L519"></a><tt class="py-lineno">519</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt id="link-171" class="py-name" targets="Method pyspark.context.SparkContext.getLocalProperty()=pyspark.context.SparkContext-class.html#getLocalProperty"><a title="pyspark.context.SparkContext.getLocalProperty" class="py-name" href="#" onclick="return doclink('link-171', 'getLocalProperty', 'link-171');">getLocalProperty</a></tt><tt class="py-op">(</tt><tt class="py-name">key</tt><tt class="py-op">)</tt> </tt>
</div><a name="L520"></a><tt class="py-lineno">520</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.sparkUser"></a><div id="SparkContext.sparkUser-def"><a name="L521"></a><tt class="py-lineno">521</tt> <a class="py-toggle" href="#" id="SparkContext.sparkUser-toggle" onclick="return toggle('SparkContext.sparkUser');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#sparkUser">sparkUser</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.sparkUser-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.sparkUser-expanded"><a name="L522"></a><tt class="py-lineno">522</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L523"></a><tt class="py-lineno">523</tt>  <tt class="py-line"><tt class="py-docstring">        Get SPARK_USER for user who is running SparkContext.</tt> </tt>
<a name="L524"></a><tt class="py-lineno">524</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L525"></a><tt class="py-lineno">525</tt>  <tt class="py-line">        <tt class="py-keyword">return</tt> <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-172" class="py-name" targets="Method pyspark.context.SparkContext.sparkUser()=pyspark.context.SparkContext-class.html#sparkUser"><a title="pyspark.context.SparkContext.sparkUser" class="py-name" href="#" onclick="return doclink('link-172', 'sparkUser', 'link-172');">sparkUser</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L526"></a><tt class="py-lineno">526</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.cancelJobGroup"></a><div id="SparkContext.cancelJobGroup-def"><a name="L527"></a><tt class="py-lineno">527</tt> <a class="py-toggle" href="#" id="SparkContext.cancelJobGroup-toggle" onclick="return toggle('SparkContext.cancelJobGroup');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#cancelJobGroup">cancelJobGroup</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">,</tt> <tt class="py-param">groupId</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.cancelJobGroup-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.cancelJobGroup-expanded"><a name="L528"></a><tt class="py-lineno">528</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L529"></a><tt class="py-lineno">529</tt>  <tt class="py-line"><tt class="py-docstring">        Cancel active jobs for the specified group. See L{SparkContext.setJobGroup}</tt> </tt>
<a name="L530"></a><tt class="py-lineno">530</tt>  <tt class="py-line"><tt class="py-docstring">        for more information.</tt> </tt>
<a name="L531"></a><tt class="py-lineno">531</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L532"></a><tt class="py-lineno">532</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-173" class="py-name" targets="Method pyspark.context.SparkContext.cancelJobGroup()=pyspark.context.SparkContext-class.html#cancelJobGroup"><a title="pyspark.context.SparkContext.cancelJobGroup" class="py-name" href="#" onclick="return doclink('link-173', 'cancelJobGroup', 'link-173');">cancelJobGroup</a></tt><tt class="py-op">(</tt><tt class="py-name">groupId</tt><tt class="py-op">)</tt> </tt>
</div><a name="L533"></a><tt class="py-lineno">533</tt>  <tt class="py-line"> </tt>
<a name="SparkContext.cancelAllJobs"></a><div id="SparkContext.cancelAllJobs-def"><a name="L534"></a><tt class="py-lineno">534</tt> <a class="py-toggle" href="#" id="SparkContext.cancelAllJobs-toggle" onclick="return toggle('SparkContext.cancelAllJobs');">-</a><tt class="py-line">    <tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context.SparkContext-class.html#cancelAllJobs">cancelAllJobs</a><tt class="py-op">(</tt><tt class="py-param">self</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="SparkContext.cancelAllJobs-collapsed" style="display:none;" pad="+++" indent="++++++++"></div><div id="SparkContext.cancelAllJobs-expanded"><a name="L535"></a><tt class="py-lineno">535</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L536"></a><tt class="py-lineno">536</tt>  <tt class="py-line"><tt class="py-docstring">        Cancel all jobs that have been scheduled or are running.</tt> </tt>
<a name="L537"></a><tt class="py-lineno">537</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L538"></a><tt class="py-lineno">538</tt>  <tt class="py-line">        <tt class="py-name">self</tt><tt class="py-op">.</tt><tt class="py-name">_jsc</tt><tt class="py-op">.</tt><tt class="py-name">sc</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-174" class="py-name" targets="Method pyspark.context.SparkContext.cancelAllJobs()=pyspark.context.SparkContext-class.html#cancelAllJobs"><a title="pyspark.context.SparkContext.cancelAllJobs" class="py-name" href="#" onclick="return doclink('link-174', 'cancelAllJobs', 'link-174');">cancelAllJobs</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div></div><a name="L539"></a><tt class="py-lineno">539</tt>  <tt class="py-line"> </tt>
<a name="_test"></a><div id="_test-def"><a name="L540"></a><tt class="py-lineno">540</tt> <a class="py-toggle" href="#" id="_test-toggle" onclick="return toggle('_test');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pyspark.context-module.html#_test">_test</a><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="_test-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="_test-expanded"><a name="L541"></a><tt class="py-lineno">541</tt>  <tt class="py-line">    <tt class="py-keyword">import</tt> <tt class="py-name">atexit</tt> </tt>
<a name="L542"></a><tt class="py-lineno">542</tt>  <tt class="py-line">    <tt class="py-keyword">import</tt> <tt class="py-name">doctest</tt> </tt>
<a name="L543"></a><tt class="py-lineno">543</tt>  <tt class="py-line">    <tt class="py-keyword">import</tt> <tt class="py-name">tempfile</tt> </tt>
<a name="L544"></a><tt class="py-lineno">544</tt>  <tt class="py-line">    <tt class="py-name">globs</tt> <tt class="py-op">=</tt> <tt class="py-name">globals</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt id="link-175" class="py-name" targets="Method pyspark.statcounter.StatCounter.copy()=pyspark.statcounter.StatCounter-class.html#copy"><a title="pyspark.statcounter.StatCounter.copy" class="py-name" href="#" onclick="return doclink('link-175', 'copy', 'link-175');">copy</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L545"></a><tt class="py-lineno">545</tt>  <tt class="py-line">    <tt class="py-name">globs</tt><tt class="py-op">[</tt><tt class="py-string">'sc'</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt id="link-176" class="py-name"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-176', 'SparkContext', 'link-36');">SparkContext</a></tt><tt class="py-op">(</tt><tt class="py-string">'local[4]'</tt><tt class="py-op">,</tt> <tt class="py-string">'PythonTest'</tt><tt class="py-op">,</tt> <tt class="py-name">batchSize</tt><tt class="py-op">=</tt><tt class="py-number">2</tt><tt class="py-op">)</tt> </tt>
<a name="L546"></a><tt class="py-lineno">546</tt>  <tt class="py-line">    <tt class="py-name">globs</tt><tt class="py-op">[</tt><tt class="py-string">'tempdir'</tt><tt class="py-op">]</tt> <tt class="py-op">=</tt> <tt class="py-name">tempfile</tt><tt class="py-op">.</tt><tt class="py-name">mkdtemp</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L547"></a><tt class="py-lineno">547</tt>  <tt class="py-line">    <tt class="py-name">atexit</tt><tt class="py-op">.</tt><tt class="py-name">register</tt><tt class="py-op">(</tt><tt class="py-keyword">lambda</tt><tt class="py-op">:</tt> <tt class="py-name">shutil</tt><tt class="py-op">.</tt><tt class="py-name">rmtree</tt><tt class="py-op">(</tt><tt class="py-name">globs</tt><tt class="py-op">[</tt><tt class="py-string">'tempdir'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L548"></a><tt class="py-lineno">548</tt>  <tt class="py-line">    <tt class="py-op">(</tt><tt class="py-name">failure_count</tt><tt class="py-op">,</tt> <tt class="py-name">test_count</tt><tt class="py-op">)</tt> <tt class="py-op">=</tt> <tt class="py-name">doctest</tt><tt class="py-op">.</tt><tt class="py-name">testmod</tt><tt class="py-op">(</tt><tt class="py-name">globs</tt><tt class="py-op">=</tt><tt class="py-name">globs</tt><tt class="py-op">,</tt> <tt class="py-name">optionflags</tt><tt class="py-op">=</tt><tt class="py-name">doctest</tt><tt class="py-op">.</tt><tt class="py-name">ELLIPSIS</tt><tt class="py-op">)</tt> </tt>
<a name="L549"></a><tt class="py-lineno">549</tt>  <tt class="py-line">    <tt class="py-name">globs</tt><tt class="py-op">[</tt><tt class="py-string">'sc'</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt id="link-177" class="py-name"><a title="pyspark.context.SparkContext.stop" class="py-name" href="#" onclick="return doclink('link-177', 'stop', 'link-106');">stop</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L550"></a><tt class="py-lineno">550</tt>  <tt class="py-line">    <tt class="py-keyword">if</tt> <tt class="py-name">failure_count</tt><tt class="py-op">:</tt> </tt>
<a name="L551"></a><tt class="py-lineno">551</tt>  <tt class="py-line">        <tt class="py-name">exit</tt><tt class="py-op">(</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">)</tt> </tt>
</div><a name="L552"></a><tt class="py-lineno">552</tt>  <tt class="py-line"> </tt>
<a name="L553"></a><tt class="py-lineno">553</tt>  <tt class="py-line"> </tt>
<a name="L554"></a><tt class="py-lineno">554</tt>  <tt class="py-line"><tt class="py-keyword">if</tt> <tt class="py-name">__name__</tt> <tt class="py-op">==</tt> <tt class="py-string">"__main__"</tt><tt class="py-op">:</tt> </tt>
<a name="L555"></a><tt class="py-lineno">555</tt>  <tt class="py-line">    <tt class="py-name">_test</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L556"></a><tt class="py-lineno">556</tt>  <tt class="py-line"> </tt><script type="text/javascript">
<!--
expandto(location.href);
// -->
</script>
</pre>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark.apache.org">Spark 1.0.0 Python API Docs</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Fri Jul  4 18:52:26 2014
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>