summaryrefslogblamecommitdiff
path: root/site/releases/spark-release-0-7-3.html
blob: 33b92a9854d6c721006521d06d28ec563da1de02 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14













                                            
                                                  

                                                       



                                       



                                                                             









                                                                                                                     
 









                                                              
           












































































                                                                                                                                                                           
                                                                                                                                                                                                                                                                                                                                                                                                                                                                               









































                                                                                                                                                                                                                                                                                                                                                                                            





                                                                                                                                              









                           
<!DOCTYPE html>
<!--[if IE 6]>
<html id="ie6" dir="ltr" lang="en-US">
<![endif]-->
<!--[if IE 7]>
<html id="ie7" dir="ltr" lang="en-US">
<![endif]-->
<!--[if IE 8]>
<html id="ie8" dir="ltr" lang="en-US">
<![endif]-->
<!--[if !(IE 6) | !(IE 7) | !(IE 8)  ]><!-->
<html dir="ltr" lang="en-US">
<!--<![endif]-->
<head>
  <link rel="shortcut icon" href="/favicon.ico" />
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width" />
  <title>
     Spark Release 0.7.3 | Apache Spark
    
  </title>

  <link rel="stylesheet" type="text/css" media="all" href="/css/style.css" />
  <link rel="stylesheet" href="/css/pygments-default.css">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {  
    try { 
      _gaq.push(['_trackEvent', category , action]); 
    } catch(err){}
 
    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <link rel='canonical' href='/index.html' />

  <style type="text/css">
    #site-title,
    #site-description {
      position: absolute !important;
      clip: rect(1px 1px 1px 1px); /* IE6, IE7 */
      clip: rect(1px, 1px, 1px, 1px);
    }
  </style>
  <style type="text/css" id="custom-background-css">
    body.custom-background { background-color: #f1f1f1; }
  </style>
</head>

<!--body class="page singular"-->
<body class="singular">
<div id="page" class="hfeed">

  <header id="branding" role="banner">
  <hgroup>
    <h1 id="site-title"><span><a href="/" title="Spark" rel="home">Spark</a></span></h1>
    <h2 id="site-description">Lightning-Fast Cluster Computing</h2>
  </hgroup>

  <a href="/">
    <img src="/images/spark-project-header1.png" width="1000" height="220" alt="Spark: Lightning-Fast Cluster Computing" title="Spark: Lightning-Fast Cluster Computing" />
  </a>

  <nav id="access" role="navigation">
    <h3 class="assistive-text">Main menu</h3>
    <div class="menu-main-menu-container">
      <ul id="menu-main-menu" class="menu">
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/index.html">Home</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/downloads.html">Downloads</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/documentation.html">Documentation</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/examples.html">Examples</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/mailing-lists.html">Mailing Lists</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/research.html">Research</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/faq.html">FAQ</a>
        </li>
        
      </ul></div>
  </nav><!-- #access -->
</header><!-- #branding -->



  <div id="main">
    <div id="primary">
      <div id="content" role="main">
        
          <article class="page type-page status-publish hentry">
            <h2>Spark Release 0.7.3</h2>


<p>Spark 0.7.3 is a maintenance release with several bug fixes, performance fixes, and new features. You can download it as a <a href="http://spark-project.org/download/spark-0.7.3-sources.tgz">source package</a> (4 MB tar.gz) or get prebuilt packages for <a href="http://spark-project.org/download/spark-0.7.3-prebuilt-hadoop1.tgz">Hadoop 1 / CDH3</a> or for <a href="http://spark-project.org/download/spark-0.7.3-prebuilt-cdh4.tgz">CDH 4</a> (61 MB tar.gz).</p>

<p>We recommend that all users update to this maintenance release.</p>

<p>The improvements in this release include:</p>

<ul>
  <li><b>New "add JARs" functionality in Spark shell:</b> Users of <code>spark-shell</code> can now set the <code>ADD_JARS</code> environment variable to add a list of JARs to their clusters; these will also be sent to workers.</li>
  <li><b>Windows fixes:</b> Spark standalone clusters now properly kill executors when a job ends or fails. In addition, adding JAR paths with backslashes will now work correctly.</li>
  <li><b>Streaming API fixes:</b> The Kafka and Twitter APIs for Spark Streaming have been updated. In the Twitter case, this is to deal with the username/password authentication method being disabled in by Twitter, while in the Kafka case, it is to allow receiving messages other than strings. Note that these are breaking API changes as the Streaming API is still in alpha.</li>
  <li><b>Python performance:</b> Spark's mechanism for spawning Python VMs has been improved to do so faster when the JVM has a large heap size, speeding up the Python API.</li>
  <li><b>Mesos fixes:</b> JARs added to your job will now be on the classpath when deserializing task results in Mesos.</li>
  <li><b>Error reporting:</b> Better error reporting for non-serializable exceptions and overly large task results.</li>
  <li><b>Examples:</b> Added an example of stateful stream processing with <code>updateStateByKey</code>.</li>
  <li><b>Build:</b> Spark Streaming no longer depends on the Twitter4J repo, which should allow it to build in China.</li>
  <li><b>Bug fixes</b> in <code>foldByKey</code>, streaming <code>count</code>, statistics methods, documentation, and web UI.</li>
</ul>

<p>The following people contributed to this release:</p>

<ul>
  <li>Charles Reiss (Mesos)</li>
  <li>Christoph Grothaus (Windows spawn fixes)</li>
  <li>Christopher Nguyen (bug fixes)</li>
  <li>James Phillpotts (Twitter input stream)</li>
  <li>Jey Kottalam (Python performance)</li>
  <li>Josh Rosen (usability)</li>
  <li>Konstantin Boudnik (build)</li>
  <li>Mark Hamstra (build)</li>
  <li>Matei Zaharia (Windows, docs, ADD_JARS, Python, streaming)</li>
  <li>Patrick Wendell (usability)</li>
  <li>Tathagata Das (streaming fixes)</li>
  <li>Jerry Shao (bug fixes)</li>
  <li>S. Kumar (examples)</li>
  <li>Sean McNamara (Kafka input streams, streaming fixes)</li>
</ul>

          </article><!-- #post -->
        
      </div><!-- #content -->
      
      <footer id="colophon" role="contentinfo">
  <div id="site-generator">
    <p style="padding-top: 0; padding-bottom: 15px;">
      Apache Spark is an effort undergoing incubation at The Apache Software Foundation.
      <a href="http://incubator.apache.org/" style="border: none;">
        <img style="vertical-align: middle; border: none;" src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" />
      </a>  
    </p>
  </div>
</footer><!-- #colophon -->

    </div><!-- #primary -->
  </div><!-- #main -->
</div><!-- #page -->


</body>
</html>