summaryrefslogblamecommitdiff
path: root/site/docs/0.9.0/api/pyspark/pyspark-pysrc.html
blob: bff088b91d81f719df07362c31a4fcfa698c37c6 (plain) (tree)























































































































































                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
                                                         


















                                                                
<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>pyspark</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th bgcolor="#70b0f0" class="navbar-select"
          >&nbsp;&nbsp;&nbsp;Home&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        Package&nbsp;pyspark
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="pyspark-pysrc.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<h1 class="epydoc">Source Code for <a href="pyspark-module.html">Package pyspark</a></h1>
<pre class="py-src">
<a name="L1"></a><tt class="py-lineno"> 1</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L2"></a><tt class="py-lineno"> 2</tt>  <tt class="py-line"><tt class="py-comment"># Licensed to the Apache Software Foundation (ASF) under one or more</tt> </tt>
<a name="L3"></a><tt class="py-lineno"> 3</tt>  <tt class="py-line"><tt class="py-comment"># contributor license agreements.  See the NOTICE file distributed with</tt> </tt>
<a name="L4"></a><tt class="py-lineno"> 4</tt>  <tt class="py-line"><tt class="py-comment"># this work for additional information regarding copyright ownership.</tt> </tt>
<a name="L5"></a><tt class="py-lineno"> 5</tt>  <tt class="py-line"><tt class="py-comment"># The ASF licenses this file to You under the Apache License, Version 2.0</tt> </tt>
<a name="L6"></a><tt class="py-lineno"> 6</tt>  <tt class="py-line"><tt class="py-comment"># (the "License"); you may not use this file except in compliance with</tt> </tt>
<a name="L7"></a><tt class="py-lineno"> 7</tt>  <tt class="py-line"><tt class="py-comment"># the License.  You may obtain a copy of the License at</tt> </tt>
<a name="L8"></a><tt class="py-lineno"> 8</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L9"></a><tt class="py-lineno"> 9</tt>  <tt class="py-line"><tt class="py-comment">#    http://www.apache.org/licenses/LICENSE-2.0</tt> </tt>
<a name="L10"></a><tt class="py-lineno">10</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L11"></a><tt class="py-lineno">11</tt>  <tt class="py-line"><tt class="py-comment"># Unless required by applicable law or agreed to in writing, software</tt> </tt>
<a name="L12"></a><tt class="py-lineno">12</tt>  <tt class="py-line"><tt class="py-comment"># distributed under the License is distributed on an "AS IS" BASIS,</tt> </tt>
<a name="L13"></a><tt class="py-lineno">13</tt>  <tt class="py-line"><tt class="py-comment"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</tt> </tt>
<a name="L14"></a><tt class="py-lineno">14</tt>  <tt class="py-line"><tt class="py-comment"># See the License for the specific language governing permissions and</tt> </tt>
<a name="L15"></a><tt class="py-lineno">15</tt>  <tt class="py-line"><tt class="py-comment"># limitations under the License.</tt> </tt>
<a name="L16"></a><tt class="py-lineno">16</tt>  <tt class="py-line"><tt class="py-comment">#</tt> </tt>
<a name="L17"></a><tt class="py-lineno">17</tt>  <tt class="py-line"> </tt>
<a name="L18"></a><tt class="py-lineno">18</tt>  <tt class="py-line"><tt class="py-docstring">"""</tt> </tt>
<a name="L19"></a><tt class="py-lineno">19</tt>  <tt class="py-line"><tt class="py-docstring">PySpark is the Python API for Spark.</tt> </tt>
<a name="L20"></a><tt class="py-lineno">20</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L21"></a><tt class="py-lineno">21</tt>  <tt class="py-line"><tt class="py-docstring">Public classes:</tt> </tt>
<a name="L22"></a><tt class="py-lineno">22</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L23"></a><tt class="py-lineno">23</tt>  <tt class="py-line"><tt class="py-docstring">  - L{SparkContext&lt;pyspark.context.SparkContext&gt;}</tt> </tt>
<a name="L24"></a><tt class="py-lineno">24</tt>  <tt class="py-line"><tt class="py-docstring">      Main entry point for Spark functionality.</tt> </tt>
<a name="L25"></a><tt class="py-lineno">25</tt>  <tt class="py-line"><tt class="py-docstring">  - L{RDD&lt;pyspark.rdd.RDD&gt;}</tt> </tt>
<a name="L26"></a><tt class="py-lineno">26</tt>  <tt class="py-line"><tt class="py-docstring">      A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.</tt> </tt>
<a name="L27"></a><tt class="py-lineno">27</tt>  <tt class="py-line"><tt class="py-docstring">  - L{Broadcast&lt;pyspark.broadcast.Broadcast&gt;}</tt> </tt>
<a name="L28"></a><tt class="py-lineno">28</tt>  <tt class="py-line"><tt class="py-docstring">      A broadcast variable that gets reused across tasks.</tt> </tt>
<a name="L29"></a><tt class="py-lineno">29</tt>  <tt class="py-line"><tt class="py-docstring">  - L{Accumulator&lt;pyspark.accumulators.Accumulator&gt;}</tt> </tt>
<a name="L30"></a><tt class="py-lineno">30</tt>  <tt class="py-line"><tt class="py-docstring">      An "add-only" shared variable that tasks can only add values to.</tt> </tt>
<a name="L31"></a><tt class="py-lineno">31</tt>  <tt class="py-line"><tt class="py-docstring">  - L{SparkConf&lt;pyspark.conf.SparkConf&gt;}</tt> </tt>
<a name="L32"></a><tt class="py-lineno">32</tt>  <tt class="py-line"><tt class="py-docstring">      For configuring Spark.</tt> </tt>
<a name="L33"></a><tt class="py-lineno">33</tt>  <tt class="py-line"><tt class="py-docstring">  - L{SparkFiles&lt;pyspark.files.SparkFiles&gt;}</tt> </tt>
<a name="L34"></a><tt class="py-lineno">34</tt>  <tt class="py-line"><tt class="py-docstring">      Access files shipped with jobs.</tt> </tt>
<a name="L35"></a><tt class="py-lineno">35</tt>  <tt class="py-line"><tt class="py-docstring">  - L{StorageLevel&lt;pyspark.storagelevel.StorageLevel&gt;}</tt> </tt>
<a name="L36"></a><tt class="py-lineno">36</tt>  <tt class="py-line"><tt class="py-docstring">      Finer-grained cache persistence levels.</tt> </tt>
<a name="L37"></a><tt class="py-lineno">37</tt>  <tt class="py-line"><tt class="py-docstring">"""</tt> </tt>
<a name="L38"></a><tt class="py-lineno">38</tt>  <tt class="py-line"> </tt>
<a name="L39"></a><tt class="py-lineno">39</tt>  <tt class="py-line"> </tt>
<a name="L40"></a><tt class="py-lineno">40</tt>  <tt class="py-line"> </tt>
<a name="L41"></a><tt class="py-lineno">41</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">sys</tt> </tt>
<a name="L42"></a><tt class="py-lineno">42</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">os</tt> </tt>
<a name="L43"></a><tt class="py-lineno">43</tt>  <tt class="py-line"><tt class="py-name">sys</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">insert</tt><tt class="py-op">(</tt><tt class="py-number">0</tt><tt class="py-op">,</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt id="link-0" class="py-name" targets="Method pyspark.rdd.RDD.join()=pyspark.rdd.RDD-class.html#join"><a title="pyspark.rdd.RDD.join" class="py-name" href="#" onclick="return doclink('link-0', 'join', 'link-0');">join</a></tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">environ</tt><tt class="py-op">[</tt><tt class="py-string">"SPARK_HOME"</tt><tt class="py-op">]</tt><tt class="py-op">,</tt> <tt class="py-string">"python/lib/py4j-0.8.1-src.zip"</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L44"></a><tt class="py-lineno">44</tt>  <tt class="py-line"> </tt>
<a name="L45"></a><tt class="py-lineno">45</tt>  <tt class="py-line"> </tt>
<a name="L46"></a><tt class="py-lineno">46</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-1" class="py-name" targets="Package pyspark=pyspark-module.html"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-1', 'pyspark', 'link-1');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-2" class="py-name" targets="Module pyspark.conf=pyspark.conf-module.html"><a title="pyspark.conf" class="py-name" href="#" onclick="return doclink('link-2', 'conf', 'link-2');">conf</a></tt> <tt class="py-keyword">import</tt> <tt id="link-3" class="py-name" targets="Class pyspark.conf.SparkConf=pyspark.conf.SparkConf-class.html"><a title="pyspark.conf.SparkConf" class="py-name" href="#" onclick="return doclink('link-3', 'SparkConf', 'link-3');">SparkConf</a></tt> </tt>
<a name="L47"></a><tt class="py-lineno">47</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-4" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-4', 'pyspark', 'link-1');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-5" class="py-name" targets="Module pyspark.context=pyspark.context-module.html,Method pyspark.rdd.RDD.context()=pyspark.rdd.RDD-class.html#context"><a title="pyspark.context
pyspark.rdd.RDD.context" class="py-name" href="#" onclick="return doclink('link-5', 'context', 'link-5');">context</a></tt> <tt class="py-keyword">import</tt> <tt id="link-6" class="py-name" targets="Class pyspark.context.SparkContext=pyspark.context.SparkContext-class.html"><a title="pyspark.context.SparkContext" class="py-name" href="#" onclick="return doclink('link-6', 'SparkContext', 'link-6');">SparkContext</a></tt> </tt>
<a name="L48"></a><tt class="py-lineno">48</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-7" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-7', 'pyspark', 'link-1');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-8" class="py-name" targets="Module pyspark.rdd=pyspark.rdd-module.html"><a title="pyspark.rdd" class="py-name" href="#" onclick="return doclink('link-8', 'rdd', 'link-8');">rdd</a></tt> <tt class="py-keyword">import</tt> <tt id="link-9" class="py-name" targets="Class pyspark.rdd.RDD=pyspark.rdd.RDD-class.html"><a title="pyspark.rdd.RDD" class="py-name" href="#" onclick="return doclink('link-9', 'RDD', 'link-9');">RDD</a></tt> </tt>
<a name="L49"></a><tt class="py-lineno">49</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-10" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-10', 'pyspark', 'link-1');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-11" class="py-name" targets="Module pyspark.files=pyspark.files-module.html"><a title="pyspark.files" class="py-name" href="#" onclick="return doclink('link-11', 'files', 'link-11');">files</a></tt> <tt class="py-keyword">import</tt> <tt id="link-12" class="py-name" targets="Class pyspark.files.SparkFiles=pyspark.files.SparkFiles-class.html"><a title="pyspark.files.SparkFiles" class="py-name" href="#" onclick="return doclink('link-12', 'SparkFiles', 'link-12');">SparkFiles</a></tt> </tt>
<a name="L50"></a><tt class="py-lineno">50</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-13" class="py-name"><a title="pyspark" class="py-name" href="#" onclick="return doclink('link-13', 'pyspark', 'link-1');">pyspark</a></tt><tt class="py-op">.</tt><tt id="link-14" class="py-name" targets="Module pyspark.storagelevel=pyspark.storagelevel-module.html"><a title="pyspark.storagelevel" class="py-name" href="#" onclick="return doclink('link-14', 'storagelevel', 'link-14');">storagelevel</a></tt> <tt class="py-keyword">import</tt> <tt id="link-15" class="py-name" targets="Class pyspark.storagelevel.StorageLevel=pyspark.storagelevel.StorageLevel-class.html"><a title="pyspark.storagelevel.StorageLevel" class="py-name" href="#" onclick="return doclink('link-15', 'StorageLevel', 'link-15');">StorageLevel</a></tt> </tt>
<a name="L51"></a><tt class="py-lineno">51</tt>  <tt class="py-line"> </tt>
<a name="L52"></a><tt class="py-lineno">52</tt>  <tt class="py-line"> </tt>
<a name="L53"></a><tt class="py-lineno">53</tt>  <tt class="py-line"><tt class="py-name">__all__</tt> <tt class="py-op">=</tt> <tt class="py-op">[</tt><tt class="py-string">"SparkConf"</tt><tt class="py-op">,</tt> <tt class="py-string">"SparkContext"</tt><tt class="py-op">,</tt> <tt class="py-string">"RDD"</tt><tt class="py-op">,</tt> <tt class="py-string">"SparkFiles"</tt><tt class="py-op">,</tt> <tt class="py-string">"StorageLevel"</tt><tt class="py-op">]</tt> </tt>
<a name="L54"></a><tt class="py-lineno">54</tt>  <tt class="py-line"> </tt><script type="text/javascript">
<!--
expandto(location.href);
// -->
</script>
</pre>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th bgcolor="#70b0f0" class="navbar-select"
          >&nbsp;&nbsp;&nbsp;Home&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark-project.org">PySpark</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Sun Mar  2 16:35:00 2014
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>