summaryrefslogtreecommitdiff
path: root/site/releases/spark-release-1-4-1.html
blob: 2223a3dd638b18e77a63eb54a4465f57b0cb957d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark Release 1.4.1 | Apache Spark
    
  </title>

  

  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <!-- Code highlighter CSS -->
  <link href="/css/pygments-default.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {
    try {
      _gaq.push(['_trackEvent', category , action]);
    } catch(err){}

    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo-trademark.png"
        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
          Lightning-fast cluster computing
      </span>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Libraries <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/sql/">SQL and DataFrames</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="/graphx/">GraphX (graph)</a></li>
          <li class="divider"></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third-Party Packages</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/docs/latest/">Latest Release (Spark 2.0.0)</a></li>
          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
        </ul>
      </li>
      <li><a href="/examples.html">Examples</a></li>
      <li class="dropdown">
        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">Project Committers</a></li>
          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
    <ul class="nav navbar-nav navbar-right">
      <li class="dropdown">
        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
          Apache Software Foundation <b class="caret"></b></a>
        <ul class="dropdown-menu">
          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
          <li><a href="http://www.apache.org/licenses/">License</a></li>
          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
          <li><a href="http://www.apache.org/security/">Security</a></li>
        </ul>
      </li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-2-0-0-released.html">Spark 2.0.0 released</a>
          <span class="small">(Jul 26, 2016)</span></li>
        
          <li><a href="/news/spark-1-6-2-released.html">Spark 1.6.2 released</a>
          <span class="small">(Jun 25, 2016)</span></li>
        
          <li><a href="/news/submit-talks-to-spark-summit-eu-2016.html">Call for Presentations for Spark Summit EU is Open</a>
          <span class="small">(Jun 16, 2016)</span></li>
        
          <li><a href="/news/spark-2.0.0-preview.html">Preview release of Spark 2.0</a>
          <span class="small">(May 26, 2016)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Built-in Libraries:
      </p>
      <ul class="list-none">
        <li><a href="/sql/">SQL and DataFrames</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="/graphx/">GraphX (graph)</a></li>
      </ul>
      <a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third-Party Packages</a>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <h2>Spark Release 1.4.1</h2>


<p>Spark 1.4.1 is a maintenance release containing stability fixes. This release is based on the <a href="https://github.com/apache/spark/tree/branch-1.4">branch-1.4</a> maintenance branch of Spark. We recommend all 1.4.0 users to upgrade to this stable release. 85 developers contributed to this release.</p>

<p>To download Spark 1.4.1 visit the <a href="/downloads.html">downloads</a> page.</p>

<h3 id="fixes">Fixes</h3>
<p>Spark 1.4.1 contains several bug fixes in Spark&#8217;s DataFrame and data source support and assorted fixes in other components. Some of the more important fixes are highlighted below. You can visit the <a href="https://issues.apache.org/jira/issues/?jql=project%20%3D%20SPARK%20AND%20fixVersion%20%3D%201.4.1%20ORDER%20BY%20priority%2C%20component">Spark issue tracker</a> for the full list of fixes.</p>

<h4 id="data-sources-and-dataframes">Data Sources and DataFrames</h4>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8804">SPARK-8804</a>: Order of UTF8String is not consistent with String if there is any non-ascii character in it</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8406">SPARK-8406</a>: Race condition when writing Parquet files</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8329">SPARK-8329</a>: DataSource options parser no longer accepts &#8216;_&#8217;</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8368">SPARK-8368</a>: ClassNotFoundException in closure for map</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8470">SPARK-8470</a>: MissingRequirementError for ScalaReflection on user classes</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8358">SPARK-8358</a>: DataFrame explode with alias and * fails</li>
</ul>

<h4 id="mllib">MLLib</h4>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8151">SPARK-8151</a>: Pipeline components should correctly implement copy</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8468">SPARK-8468</a>: Some metrics in RegressionEvaluator should have negative sign</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8736">SPARK-8736</a>: GBTRegressionModel shouldn’t threshold predictions</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8563">SPARK-8563</a>: IndexedRowMatrix.computeSVD() yields the U with wrong numCols</li>
</ul>

<h4 id="pyspark">PySpark</h4>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8202">SPARK-8202</a>: Infinite loop during external sort</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8573">SPARK-8573</a>: Trigger exceptions when invalid operators are used</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8766">SPARK-8766</a>: Support non ASCII characters in columns</li>
</ul>

<h4 id="sparkr">SparkR</h4>
<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8506">SPARK-8506</a>: Support for Spark packages when initializing SparkR</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8085">SPARK-8085</a>: Support for user defined schemas when reading from data sources</li>
</ul>

<h3 id="contributors">Contributors</h3>
<p>The following developers contributed to this release:</p>

<ul>
  <li>Adam Roberts &#8211; Bug fixes in Core and tests</li>
  <li>Ai He &#8211; Improvements in PySpark</li>
  <li>Alok Singh &#8211; Bug fixes in Core</li>
  <li>Amey Chaugule &#8211; Improvement in SQL</li>
  <li>Andrew Or &#8211; Bug fixes in Core, tests, Shuffle, spark submit, and SQL; improvement in Web UI and tests</li>
  <li>Animesh Baranawal &#8211; Improvement in SQL</li>
  <li>Ben Fradet &#8211; Bug fixes in Streaming and Web UI</li>
  <li>Brian Lockwood &#8211; Documentation in Core</li>
  <li>Burak Yavuz &#8211; Test in SQL; improvements in spark submit; bug fixes in spark submit and SQL; improvement in SQL</li>
  <li>Carson Wang &#8211; Bug fixes in Core and Web UI</li>
  <li>Cheng Hao &#8211; Bug fixes in SQL</li>
  <li>Cheng Lian &#8211; Documentation in Core; bug fixes in SQL</li>
  <li>Cheolsoo Park &#8211; Improvement in SQL</li>
  <li>Chris Freeman &#8211; Bug fixes in r</li>
  <li>Christian Kadner &#8211; Bug fixes in SQL</li>
  <li>Cody Koeninger &#8211; Improvements and improvement in Streaming</li>
  <li>DB Tsai &#8211; New features in MLlib</li>
  <li>Daniel Darabos &#8211; Bug fixes in EC2</li>
  <li>Daoyuan Wang &#8211; Bug fixes in SQL</li>
  <li>Davies Liu &#8211; Bug fixes in PySpark; improvement in SQL and PySpark</li>
  <li>Devaraj K &#8211; Bug fixes in YARN</li>
  <li>Dibyendu Bhattacharya &#8211; Bug fixes in Streaming and Web UI</li>
  <li>Favio Vazquez &#8211; Documentation in Core and MLlib</li>
  <li>Hari Shreedharan &#8211; Bug fixes in Streaming and Core</li>
  <li>Holden Karau &#8211; Documentation in Core; bug fixes in sparkr, Shuffle, and MLlib</li>
  <li>Hossein Falaki &#8211; Improvements and bug fixes in sparkr</li>
  <li>Huang Zhaowei &#8211; Bug fixes in Streaming and YARN</li>
  <li>Jean Lyn &#8211; Bug fixes in SQL</li>
  <li>Joseph K. Bradley &#8211; Improvements in MLlib; bug fixes in MLlib; improvement in Core and MLlib</li>
  <li>Josh Rosen &#8211; New features in SQL; bug fixes in scheduler and SQL; improvement in Core</li>
  <li>Kevin Conor &#8211; Bug fixes in PySpark</li>
  <li>Kousuke Saruta &#8211; Documentation in Core</li>
  <li>Lars Francke &#8211; Documentation in Core</li>
  <li>Lee &#8211; Bug fixes in MLlib</li>
  <li>Liang-Chi Hsieh &#8211; Improvements in SQL; bug fixes in MLlib and SQL</li>
  <li>Lianhui Wang &#8211; Bug fixes in Shuffle</li>
  <li>Luca Martinetti &#8211; Documentation in Core</li>
  <li>Manoj Kumar &#8211; Improvements in SQL; bug fixes in MLlib and PySpark</li>
  <li>Marcelo Vanzin &#8211; Improvements in Core; bug fixes in Core and Web UI</li>
  <li>Mark Smith &#8211; Bug fixes in EC2</li>
  <li>Michael Armbrust &#8211; Bug fixes in SQL</li>
  <li>Mike Dusenberry &#8211; Improvements in PySpark, Core, SQL, and Streaming</li>
  <li>Moussa Taifi &#8211; Documentation in Core</li>
  <li>Nathan Howell &#8211; Bug fixes in SQL</li>
  <li>Navis Ryu &#8211; Bug fixes in SQL</li>
  <li>Neelesh Srinivas Salian &#8211; Documentation in Core and YARN</li>
  <li>Oleksiy Dyagilev &#8211; Bug fixes in Core and MLlib</li>
  <li>Paavo Parkkinen &#8211; Bug fixes in Streaming and MLlib</li>
  <li>Patrick Wendell &#8211; Improvements in Core</li>
  <li>Punya Biswal &#8211; Bug fixes in Core</li>
  <li>Qian Huang &#8211; New features in sparkr</li>
  <li>Radek Ostrowski &#8211; Documentation in Core</li>
  <li>Ram Sriharsha &#8211; New features in MLlib and PySpark</li>
  <li>Rekha Joshi &#8211; Improvements in Web UI</li>
  <li>Reynold Xin &#8211; Bug fixes and improvement in SQL</li>
  <li>Rosstin Murphy &#8211; Documentation in Core</li>
  <li>Ryan Williams &#8211; Bug fixes in Core</li>
  <li>Saisai Shao &#8211; Bug fixes in Core and Streaming</li>
  <li>Scott Taylor &#8211; Test in PySpark</li>
  <li>Sean Owen &#8211; Improvements and bug fixes in Core</li>
  <li>Sephiroth Lin &#8211; Improvements in YARN</li>
  <li>Shiming Fei &#8211; Bug fixes in Core</li>
  <li>Shivaram Venkataraman &#8211; Improvements in Core, EC2, and sparkr; bug fixes in sparkr and EC2</li>
  <li>Shixiong Zhu &#8211; Test in Streaming, tests, and Core; improvements in Streaming and Core; bug fixes in Core, tests, PySpark, and Streaming</li>
  <li>Simon Hafner &#8211; Bug fixes in EC2</li>
  <li>Sun Rui &#8211; Bug fixes in Core and sparkr</li>
  <li>Tao Li &#8211; Bug fixes in YARN</li>
  <li>Tathagata Das &#8211; Improvements in Core and Streaming; bug fixes in Core</li>
  <li>Ted Blackman &#8211; Bug fixes in PySpark</li>
  <li>Ted Yu &#8211; Bug fixes in SQL</li>
  <li>Tim Ellison &#8211; Bug fixes in Core</li>
  <li>Timothy Chen &#8211; Bug fixes in Web UI and Mesos</li>
  <li>Tingjun Xu &#8211; Improvements in Core</li>
  <li>Tom Graves &#8211; Bug fixes in Core</li>
  <li>Vinod K C &#8211; Improvements and bug fixes in SQL</li>
  <li>Vyacheslav Baranov &#8211; Bug fixes in Core</li>
  <li>Wang Tao &#8211; Bug fixes in Core and YARN</li>
  <li>Wenchen Fan &#8211; Improvement in SQL</li>
  <li>Xiangrui Meng &#8211; Bug fixes in MLlib</li>
  <li>Yanbo Liang &#8211; Improvements in Core, MLlib, and PySpark</li>
  <li>Yin Huai &#8211; Improvements in Core, tests, and SQL; bug fixes in tests and SQL; improvement in tests and SQL</li>
  <li>Yu ISHIKAWA &#8211; Improvements in PySpark; improvement in sparkr</li>
  <li>Yuhao Yang &#8211; Bug fixes in MLlib</li>
  <li>Yuming Wang &#8211; Github integration in Web UI</li>
  <li>Yuri Saito &#8211; Bug fixes in PySpark</li>
</ul>

<p><em>Thanks to everyone who contributed!</em></p>



<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are <a href="/trademarks.html">trademarks</a> of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

</body>
</html>