summaryrefslogblamecommitdiff
path: root/site/releases/spark-release-1-4-1.html
blob: 3c47124d4adf757a179b3282e754559102b9974f (plain) (tree)



















                                                                        


                                                          










































                                                                                                                     
                                                 



























                                                                                                              
                                                         



                                                                 
                                                                                                                            






                                                                   
                                                                           
                                                                                       

















                                                                                                                












                                                                                              










                                                   


                                                                                
                                                                                
                                                        
        


                                                                                


                                                                                                                              










                                                                                                      
                                                       



                                                               
                                                                                                               






                                      
                                                                                                                                                                                                                                                                                                                     



                                                                                      











                                                                                                                                                                                                                                                                                                                                                                                                                         

















                                                                                                                                                   





                                                                                                                                                     










































































































                                                                                                                                                        
                                                                                                  






                                                                     
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark Release 1.4.1 | Apache Spark
    
  </title>

  

  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <!-- Code highlighter CSS -->
  <link href="/css/pygments-default.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {
    try {
      _gaq.push(['_trackEvent', category , action]);
    } catch(err){}

    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo-trademark.png"
        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
          Lightning-fast cluster computing
      </span>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Libraries <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/sql/">SQL and DataFrames</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="/graphx/">GraphX (graph)</a></li>
          <li class="divider"></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third-Party Packages</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/docs/latest/">Latest Release (Spark 2.0.0)</a></li>
          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
        </ul>
      </li>
      <li><a href="/examples.html">Examples</a></li>
      <li class="dropdown">
        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">Project Committers</a></li>
          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
    <ul class="nav navbar-nav navbar-right">
      <li class="dropdown">
        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
          Apache Software Foundation <b class="caret"></b></a>
        <ul class="dropdown-menu">
          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
          <li><a href="http://www.apache.org/licenses/">License</a></li>
          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
          <li><a href="http://www.apache.org/security/">Security</a></li>
        </ul>
      </li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-2-0-1-released.html">Spark 2.0.1 released</a>
          <span class="small">(Oct 03, 2016)</span></li>
        
          <li><a href="/news/spark-2-0-0-released.html">Spark 2.0.0 released</a>
          <span class="small">(Jul 26, 2016)</span></li>
        
          <li><a href="/news/spark-1-6-2-released.html">Spark 1.6.2 released</a>
          <span class="small">(Jun 25, 2016)</span></li>
        
          <li><a href="/news/submit-talks-to-spark-summit-eu-2016.html">Call for Presentations for Spark Summit EU is Open</a>
          <span class="small">(Jun 16, 2016)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Built-in Libraries:
      </p>
      <ul class="list-none">
        <li><a href="/sql/">SQL and DataFrames</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="/graphx/">GraphX (graph)</a></li>
      </ul>
      <a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third-Party Packages</a>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <h2>Spark Release 1.4.1</h2>


<p>Spark 1.4.1 is a maintenance release containing stability fixes. This release is based on the <a href="https://github.com/apache/spark/tree/branch-1.4">branch-1.4</a> maintenance branch of Spark. We recommend all 1.4.0 users to upgrade to this stable release. 85 developers contributed to this release.</p>

<p>To download Spark 1.4.1 visit the <a href="/downloads.html">downloads</a> page.</p>

<h3 id="fixes">Fixes</h3>
<p>Spark 1.4.1 contains several bug fixes in Spark&#8217;s DataFrame and data source support and assorted fixes in other components. Some of the more important fixes are highlighted below. You can visit the <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20SPARK%20AND%20fixVersion%20%3D%201.4.1%20ORDER%20BY%20priority%2C%20component">Spark issue tracker</a> for the full list of fixes.</p>

<h4 id="data-sources-and-dataframes">Data Sources and DataFrames</h4>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8804">SPARK-8804</a>: Order of UTF8String is not consistent with String if there is any non-ascii character in it</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8406">SPARK-8406</a>: Race condition when writing Parquet files</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8329">SPARK-8329</a>: DataSource options parser no longer accepts &#8216;_&#8217;</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8368">SPARK-8368</a>: ClassNotFoundException in closure for map</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8470">SPARK-8470</a>: MissingRequirementError for ScalaReflection on user classes</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8358">SPARK-8358</a>: DataFrame explode with alias and * fails</li>
</ul>

<h4 id="mllib">MLLib</h4>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8151">SPARK-8151</a>: Pipeline components should correctly implement copy</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8468">SPARK-8468</a>: Some metrics in RegressionEvaluator should have negative sign</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8736">SPARK-8736</a>: GBTRegressionModel shouldn’t threshold predictions</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8563">SPARK-8563</a>: IndexedRowMatrix.computeSVD() yields the U with wrong numCols</li>
</ul>

<h4 id="pyspark">PySpark</h4>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8202">SPARK-8202</a>: Infinite loop during external sort</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8573">SPARK-8573</a>: Trigger exceptions when invalid operators are used</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8766">SPARK-8766</a>: Support non ASCII characters in columns</li>
</ul>

<h4 id="sparkr">SparkR</h4>
<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8506">SPARK-8506</a>: Support for Spark packages when initializing SparkR</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8085">SPARK-8085</a>: Support for user defined schemas when reading from data sources</li>
</ul>

<h3 id="contributors">Contributors</h3>
<p>The following developers contributed to this release:</p>

<ul>
  <li>Adam Roberts &#8211; Bug fixes in Core and tests</li>
  <li>Ai He &#8211; Improvements in PySpark</li>
  <li>Alok Singh &#8211; Bug fixes in Core</li>
  <li>Amey Chaugule &#8211; Improvement in SQL</li>
  <li>Andrew Or &#8211; Bug fixes in Core, tests, Shuffle, spark submit, and SQL; improvement in Web UI and tests</li>
  <li>Animesh Baranawal &#8211; Improvement in SQL</li>
  <li>Ben Fradet &#8211; Bug fixes in Streaming and Web UI</li>
  <li>Brian Lockwood &#8211; Documentation in Core</li>
  <li>Burak Yavuz &#8211; Test in SQL; improvements in spark submit; bug fixes in spark submit and SQL; improvement in SQL</li>
  <li>Carson Wang &#8211; Bug fixes in Core and Web UI</li>
  <li>Cheng Hao &#8211; Bug fixes in SQL</li>
  <li>Cheng Lian &#8211; Documentation in Core; bug fixes in SQL</li>
  <li>Cheolsoo Park &#8211; Improvement in SQL</li>
  <li>Chris Freeman &#8211; Bug fixes in r</li>
  <li>Christian Kadner &#8211; Bug fixes in SQL</li>
  <li>Cody Koeninger &#8211; Improvements and improvement in Streaming</li>
  <li>DB Tsai &#8211; New features in MLlib</li>
  <li>Daniel Darabos &#8211; Bug fixes in EC2</li>
  <li>Daoyuan Wang &#8211; Bug fixes in SQL</li>
  <li>Davies Liu &#8211; Bug fixes in PySpark; improvement in SQL and PySpark</li>
  <li>Devaraj K &#8211; Bug fixes in YARN</li>
  <li>Dibyendu Bhattacharya &#8211; Bug fixes in Streaming and Web UI</li>
  <li>Favio Vazquez &#8211; Documentation in Core and MLlib</li>
  <li>Hari Shreedharan &#8211; Bug fixes in Streaming and Core</li>
  <li>Holden Karau &#8211; Documentation in Core; bug fixes in sparkr, Shuffle, and MLlib</li>
  <li>Hossein Falaki &#8211; Improvements and bug fixes in sparkr</li>
  <li>Huang Zhaowei &#8211; Bug fixes in Streaming and YARN</li>
  <li>Jean Lyn &#8211; Bug fixes in SQL</li>
  <li>Joseph K. Bradley &#8211; Improvements in MLlib; bug fixes in MLlib; improvement in Core and MLlib</li>
  <li>Josh Rosen &#8211; New features in SQL; bug fixes in scheduler and SQL; improvement in Core</li>
  <li>Kevin Conor &#8211; Bug fixes in PySpark</li>
  <li>Kousuke Saruta &#8211; Documentation in Core</li>
  <li>Lars Francke &#8211; Documentation in Core</li>
  <li>Lee &#8211; Bug fixes in MLlib</li>
  <li>Liang-Chi Hsieh &#8211; Improvements in SQL; bug fixes in MLlib and SQL</li>
  <li>Lianhui Wang &#8211; Bug fixes in Shuffle</li>
  <li>Luca Martinetti &#8211; Documentation in Core</li>
  <li>Manoj Kumar &#8211; Improvements in SQL; bug fixes in MLlib and PySpark</li>
  <li>Marcelo Vanzin &#8211; Improvements in Core; bug fixes in Core and Web UI</li>
  <li>Mark Smith &#8211; Bug fixes in EC2</li>
  <li>Michael Armbrust &#8211; Bug fixes in SQL</li>
  <li>Mike Dusenberry &#8211; Improvements in PySpark, Core, SQL, and Streaming</li>
  <li>Moussa Taifi &#8211; Documentation in Core</li>
  <li>Nathan Howell &#8211; Bug fixes in SQL</li>
  <li>Navis Ryu &#8211; Bug fixes in SQL</li>
  <li>Neelesh Srinivas Salian &#8211; Documentation in Core and YARN</li>
  <li>Oleksiy Dyagilev &#8211; Bug fixes in Core and MLlib</li>
  <li>Paavo Parkkinen &#8211; Bug fixes in Streaming and MLlib</li>
  <li>Patrick Wendell &#8211; Improvements in Core</li>
  <li>Punya Biswal &#8211; Bug fixes in Core</li>
  <li>Qian Huang &#8211; New features in sparkr</li>
  <li>Radek Ostrowski &#8211; Documentation in Core</li>
  <li>Ram Sriharsha &#8211; New features in MLlib and PySpark</li>
  <li>Rekha Joshi &#8211; Improvements in Web UI</li>
  <li>Reynold Xin &#8211; Bug fixes and improvement in SQL</li>
  <li>Rosstin Murphy &#8211; Documentation in Core</li>
  <li>Ryan Williams &#8211; Bug fixes in Core</li>
  <li>Saisai Shao &#8211; Bug fixes in Core and Streaming</li>
  <li>Scott Taylor &#8211; Test in PySpark</li>
  <li>Sean Owen &#8211; Improvements and bug fixes in Core</li>
  <li>Sephiroth Lin &#8211; Improvements in YARN</li>
  <li>Shiming Fei &#8211; Bug fixes in Core</li>
  <li>Shivaram Venkataraman &#8211; Improvements in Core, EC2, and sparkr; bug fixes in sparkr and EC2</li>
  <li>Shixiong Zhu &#8211; Test in Streaming, tests, and Core; improvements in Streaming and Core; bug fixes in Core, tests, PySpark, and Streaming</li>
  <li>Simon Hafner &#8211; Bug fixes in EC2</li>
  <li>Sun Rui &#8211; Bug fixes in Core and sparkr</li>
  <li>Tao Li &#8211; Bug fixes in YARN</li>
  <li>Tathagata Das &#8211; Improvements in Core and Streaming; bug fixes in Core</li>
  <li>Ted Blackman &#8211; Bug fixes in PySpark</li>
  <li>Ted Yu &#8211; Bug fixes in SQL</li>
  <li>Tim Ellison &#8211; Bug fixes in Core</li>
  <li>Timothy Chen &#8211; Bug fixes in Web UI and Mesos</li>
  <li>Tingjun Xu &#8211; Improvements in Core</li>
  <li>Tom Graves &#8211; Bug fixes in Core</li>
  <li>Vinod K C &#8211; Improvements and bug fixes in SQL</li>
  <li>Vyacheslav Baranov &#8211; Bug fixes in Core</li>
  <li>Wang Tao &#8211; Bug fixes in Core and YARN</li>
  <li>Wenchen Fan &#8211; Improvement in SQL</li>
  <li>Xiangrui Meng &#8211; Bug fixes in MLlib</li>
  <li>Yanbo Liang &#8211; Improvements in Core, MLlib, and PySpark</li>
  <li>Yin Huai &#8211; Improvements in Core, tests, and SQL; bug fixes in tests and SQL; improvement in tests and SQL</li>
  <li>Yu ISHIKAWA &#8211; Improvements in PySpark; improvement in sparkr</li>
  <li>Yuhao Yang &#8211; Bug fixes in MLlib</li>
  <li>Yuming Wang &#8211; Github integration in Web UI</li>
  <li>Yuri Saito &#8211; Bug fixes in PySpark</li>
</ul>

<p><em>Thanks to everyone who contributed!</em></p>



<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are <a href="/trademarks.html">trademarks</a> of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

</body>
</html>