summaryrefslogtreecommitdiff
path: root/site/releases/spark-release-1-4-0.html
blob: 55d051c81ff10515ba5590fe860cd157c208bcd5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark Release 1.4.0 | Apache Spark
    
  </title>

  

  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <!-- Code highlighter CSS -->
  <link href="/css/pygments-default.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {
    try {
      _gaq.push(['_trackEvent', category , action]);
    } catch(err){}

    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo-trademark.png"
        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
          Lightning-fast cluster computing
      </span>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Libraries <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/sql/">SQL and DataFrames</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="/graphx/">GraphX (graph)</a></li>
          <li class="divider"></li>
          <li><a href="http://spark-packages.org">Third-Party Packages</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/docs/latest/">Latest Release (Spark 2.0.0)</a></li>
          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
        </ul>
      </li>
      <li><a href="/examples.html">Examples</a></li>
      <li class="dropdown">
        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">Project Committers</a></li>
          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
    <ul class="nav navbar-nav navbar-right">
      <li class="dropdown">
        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
          Apache Software Foundation <b class="caret"></b></a>
        <ul class="dropdown-menu">
          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
          <li><a href="http://www.apache.org/licenses/">License</a></li>
          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
          <li><a href="http://www.apache.org/security/">Security</a></li>
        </ul>
      </li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-2-0-0-released.html">Spark 2.0.0 released</a>
          <span class="small">(Jul 26, 2016)</span></li>
        
          <li><a href="/news/spark-1-6-2-released.html">Spark 1.6.2 released</a>
          <span class="small">(Jun 25, 2016)</span></li>
        
          <li><a href="/news/submit-talks-to-spark-summit-eu-2016.html">Call for Presentations for Spark Summit EU is Open</a>
          <span class="small">(Jun 16, 2016)</span></li>
        
          <li><a href="/news/spark-2.0.0-preview.html">Preview release of Spark 2.0</a>
          <span class="small">(May 26, 2016)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Built-in Libraries:
      </p>
      <ul class="list-none">
        <li><a href="/sql/">SQL and DataFrames</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="/graphx/">GraphX (graph)</a></li>
      </ul>
      <a href="http://spark-packages.org">Third-Party Packages</a>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <h2>Spark Release 1.4.0</h2>


<p>Spark 1.4.0 is the fifth release on the 1.X line. This release brings an R API to Spark. It also brings usability improvements in Spark’s core engine and expansion of MLlib and Spark Streaming. Spark 1.4 represents the work of more than 210 contributors from more than 70 institutions in more than 1000 individual patches.</p>

<p>To download Spark 1.4 visit the <a href="/downloads.html">downloads</a> page.</p>

<h3 id="sparkr">SparkR</h3>
<p>Spark 1.4 is the first release to package SparkR, an R binding for Spark based
on Spark&#8217;s new DataFrame API. SparkR gives R users access to Spark&#8217;s scale-out
parallel runtime along with all of Spark&#8217;s input and output formats. It also
supports calling directly into Spark SQL. 
The <a href="/docs/1.4.0/sparkr.html">R programming guide</a> has more information on how to get up and running with
SparkR.</p>

<h3 id="spark-core">Spark Core</h3>

<p>Spark core adds a variety of improvements focused on operations, performance, and compatiblity:</p>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6942">SPARK-6942</a>: Visualization for Spark DAGs and operational monitoring</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-4897">SPARK-4897</a>: Python 3 support</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-3644">SPARK-3644</a>: A REST API for application information</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-4550">SPARK-4550</a>: Serialized shuffle outputs for improved performance</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7081">SPARK-7081</a>: Initial performance improvements in project Tungsten</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-3074">SPARK-3074</a>: External spilling for Python groupByKey operations</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-3674">SPARK-3674</a>: YARN support for Spark EC2 and <a href="https://issues.apache.org/jira/browse/SPARK-5342">SPARK-5342</a>: Security for long running YARN applications</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-2691">SPARK-2691</a>: Docker support in Mesos and <a href="https://issues.apache.org/jira/browse/SPARK-6338">SPARK-6338</a>: Cluster mode in Mesos</li>
</ul>

<h3 id="dataframe-api-and-spark-sql">DataFrame API and Spark SQL</h3>
<p>The DataFrame API sees major extensions in Spark 1.4 (see <a href="https://issues.apache.org/jira/issues/?jql=parent%20%3D%20SPARK-6116%20and%20fixVersion%20%3D%201.4.0">this link</a> for a full list) with
a focus on analytic and mathmatical functions. Spark SQL introduces new operational utilities along with support for ORCFile.</p>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-2883">SPARK-2883</a>: Support for ORCFile format</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-2213">SPARK-2213</a>: Sort-merge joins to optimize very large joins</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5100">SPARK-5100</a>: Dedicated UI for the SQL JDBC server</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6829">SPARK-6829</a>: Mathematical functions in DataFrames</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-8299">SPARK-8299</a>: Improved error message reporting for DataFrame and SQL</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-1442">SPARK-1442</a>: Window functions in Spark SQL and DataFrames</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6231">SPARK-6231</a> / <a href="https://issues.apache.org/jira/browse/SPARK-7059">SPARK-7059</a>: Improved API support for self joins</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5947">SPARK-5947</a>: Partitioning support in Spark’s data source API</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7320">SPARK-7320</a>: Rollup and cube functions</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6117">SPARK-6117</a>: Summary and descriptive statistics</li>
</ul>

<h3 id="spark-mlmllib">Spark ML/MLlib</h3>
<p>Spark&#8217;s ML pipelines API graduates from alpha in this release, with new transformers and improved
Python coverage. MLlib also adds several new algorithms.</p>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5884">SPARK-5884</a>: A variety of feature transformers for ML pipelines</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7381">SPARK-7381</a>: Python API for ML pipelines</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5854">SPARK-5854</a>: Personalized PageRank for GraphX</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6113">SPARK-6113</a>: Stabilize DecisionTree and ensembles APIs</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7262">SPARK-7262</a>: Binary LogisticRegression with L1/L2 (elastic net)</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7015">SPARK-7015</a>: OneVsRest multiclass to binary reduction</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-4588">SPARK-4588</a>: Add API for feature attributes</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-1406">SPARK-1406</a>: PMML model evaluation support via MLib</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5995">SPARK-5995</a>: Make ML Prediction Developer APIs public</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-3066">SPARK-3066</a>: Support recommendAll in matrix factorization model</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-4894">SPARK-4894</a>: Bernoulli naive Bayes</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5563">SPARK-5563</a>: LDA with online variational inference to the release note</li>
</ul>

<h3 id="spark-streaming">Spark Streaming</h3>
<p>Spark streaming adds visual instrumentation graphs and significantly improved debugging information in the UI. It also enhances support for both Kafka and Kinesis. </p>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7602">SPARK-7602</a>: Visualization and monitoring in the streaming UI including batch drill down (<a href="https://issues.apache.org/jira/browse/SPARK-6796">SPARK-6796</a>, <a href="https://issues.apache.org/jira/browse/SPARK-6862">SPARK-6862</a>)</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7621">SPARK-7621</a>: Better error reporting for Kafka</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-2808">SPARK-2808</a>: Support for Kafka 0.8.2.1 and Kafka with Scala 2.11</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5946">SPARK-5946</a>: Python API for Kafka direct mode</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7111">SPARK-7111</a>: Input rate tracking for Kafka</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-5960">SPARK-5960</a>: Support for transferring AWS credentials to Kinesis</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-7056">SPARK-7056</a> A pluggable interface for write ahead logs</li>
</ul>

<h3 id="known-issues">Known Issues</h3>
<p>This release has few known issues which will be addressed in Spark 1.4.1</p>

<ul>
  <li>Python sortBy()/sortByKey() can hang if a single partition is larger than worker memory <a href="https://issues.apache.org/jira/browse/SPARK-8202">SPARK-8202</a></li>
  <li>Unintended behavior change of JSON schema inference <a href="https://issues.apache.org/jira/browse/SPARK-8093">SPARK-8093</a></li>
  <li>Some ML pipleline components do not correctly implement copy <a href="https://issues.apache.org/jira/browse/SPARK-8151">SPARK-8151</a></li>
  <li>Spark-ec2 branch pointer is wrong <a href="https://issues.apache.org/jira/browse/SPARK-8310">SPARK-8310</a></li>
</ul>

<h3 id="credits">Credits</h3>

<h4 id="test-partners">Test Partners</h4>

<p>Thanks to The following organizations, who helped benchmark or integration test release candidates: <br /> Intel, Palantir, Cloudera, Mesosphere, Huawei, Shopify, Netflix, Yahoo, UC Berkeley and Databricks. </p>

<h4 id="contributors">Contributors</h4>
<ul>
  <li>Aaron Davidson &#8211; Bug fixes in Core, Shuffle, and YARN</li>
  <li>Aaron Josephs &#8211; New features in Core</li>
  <li>Adam Budde &#8211; Bug fixes in SQL</li>
  <li>Ai He &#8211; Improvements in MLlib</li>
  <li>Andrew Or &#8211; Bug fixes in Core</li>
  <li>Andrew Or &#8211; Improvements in Core and YARN; bug fixes in Core, Web UI, Streaming, tests, and SQL; improvement in Streaming, Web UI, Core, and SQL</li>
  <li>Andrey Zagrebin &#8211; Improvement in SQL</li>
  <li>Antonio Piccolboni &#8211; New features in SparkR</li>
  <li>Arsenii Krasikov &#8211; Bug fixes in Core</li>
  <li>Ashutosh Raina &#8211; New features in SparkR</li>
  <li>Ashwin Shankar &#8211; Bug fixes in YARN</li>
  <li>Augustin Borsu &#8211; New features in MLlib</li>
  <li>Ben Fradet &#8211; Documentation in Core and Streaming</li>
  <li>Benedikt Linse &#8211; Documentation in Core</li>
  <li>Bill Chambers &#8211; Documentation in Core</li>
  <li>Brennon York &#8211; Improvements in Project Infra, Core, GraphX, and tests; bug fixes in Core</li>
  <li>Bryan Cutler &#8211; Bug fixes in Core</li>
  <li>Burak Yavuz &#8211; Test in spark submit; improvements in Core and Streaming; new features in MLlib and PySpark; bug fixes in Core, tests, and spark submit; improvement in SQL, MLlib, and PySpark</li>
  <li>Calvin Jia &#8211; Improvements and documentation in Core</li>
  <li>Chen Song &#8211; Bug fixes and improvement in SQL</li>
  <li>Cheng Chang &#8211; New features in EC2</li>
  <li>Cheng Hao &#8211; Improvements, new features, bug fixes, and improvement in SQL</li>
  <li>Cheng Lian &#8211; Bug fixes in SQL</li>
  <li>Cheng Lian &#8211; Improvements in Core and SQL; documentation in Core and SQL; bug fixes in Core and SQL; improvement in SQL</li>
  <li>Cheolsoo Park &#8211; Wish in YARN; improvements in Core and spark submit; bug fixes in Core</li>
  <li>Chris Freeman &#8211; New features in SparkR</li>
  <li>Chet Mancini &#8211; Improvements in Core and SQL</li>
  <li>Chris Heller &#8211; New features in Mesos</li>
  <li>Christophe Preaud &#8211; Documentation in Core and YARN</li>
  <li>Cody Koeninger &#8211; Bug fixes in Streaming; improvement in Core</li>
  <li>DB Tsai &#8211; Improvements, new features, and bug fixes in MLlib</li>
  <li>DEBORAH SIEGEL &#8211; Documentation in Core</li>
  <li>Dan McClary &#8211; New features in GraphX</li>
  <li>Dan Putler &#8211; New features in SparkR</li>
  <li>Daoyuan Wang &#8211; Improvements in tests and SQL; new features in SQL; bug fixes in SQL; improvement in MLlib and SQL</li>
  <li>David McGuire &#8211; Bug fixes in Streaming</li>
  <li>Davies Liu &#8211; Improvements in SQL and PySpark; new features in Core and SparkR; bug fixes in Streaming, tests, PySpark, SparkR, and SQL; improvement in Core and SQL</li>
  <li>Davies Liu &#8211; New features in SparkR</li>
  <li>Dean Chen &#8211; Improvements in Core; new features in YARN; bug fixes in Core and YARN</li>
  <li>Debasish Das &#8211; New features in MLlib</li>
  <li>Deborah Siegel &#8211; Improvements in Core</li>
  <li>Doing Done &#8211; Improvements in SQL; bug fixes in Core and SQL</li>
  <li>Dong Xu &#8211; Bug fixes in SQL</li>
  <li>Doug Balog &#8211; Bug fixes in spark submit, YARN, and SQL</li>
  <li>Edward T &#8211; New features in SparkR</li>
  <li>Elisey Zanko &#8211; Bug fixes in MLlib and PySpark</li>
  <li>Emre Sevinc &#8211; Improvements in Streaming</li>
  <li>Eric Chiang &#8211; Documentation in Core</li>
  <li>Erik Van Oosten &#8211; Bug fixes in Core</li>
  <li>Evan Jones &#8211; Bug fixes in Core</li>
  <li>Evan Yu &#8211; Bug fixes in Core</li>
  <li>Evert Lammerts &#8211; New features in SparkR</li>
  <li>Favio Vazquez &#8211; Build fixes in Core; documentation in Core and MLlib</li>
  <li>Felix Cheung &#8211; SparkR Documentation</li>
  <li>Florian Verhein &#8211; Improvements and new features in EC2</li>
  <li>Gaurav Nanda &#8211; Documentation in Core</li>
  <li>Glenn Weidner &#8211; Documentation in MLlib and PySpark</li>
  <li>Guancheng (G.C.) Chen &#8211; Improvements in Core</li>
  <li>Guancheng Chen &#8211; Improvements in Core</li>
  <li>Guo Wei &#8211; Bug fixes and window function feature in SQL</li>
  <li>GuoQiang Li &#8211; New features in Core; bug fixes in Core and YARN</li>
  <li>Haiyang Sea &#8211; Improvements in SQL</li>
  <li>Hangchen Yu &#8211; Documentation in GraphX</li>
  <li>Hao Lin &#8211; Improvements and new features in SparkR</li>
  <li>Hari Shreedharan &#8211; Test in Streaming and tests; new features in YARN; bug fixes in Web UI</li>
  <li>Harihar Nahak &#8211; New features in SparkR</li>
  <li>Holden Karau &#8211; Improvements in Core, MLlib, and PySpark; bug fixes in PySpark</li>
  <li>Hossein Falaki &#8211; SparkR Documentation</li>
  <li>Hong Shen &#8211; Bug fixes in Core and YARN</li>
  <li>Hrishikesh Subramonian &#8211; Improvements in MLlib and PySpark</li>
  <li>Hung Lin &#8211; Bug fixes in scheduler</li>
  <li>Ilya Ganelin &#8211; Improvements in Core; new features in Core; bug fixes in Core and Shuffle; improvement in Core</li>
  <li>Imran Rashid &#8211; Improvements in Web UI; bug fixes in Core and Web UI</li>
  <li>Isaias Barroso &#8211; Bug fixes in Core</li>
  <li>Iulian Dragos &#8211; Bug fixes in Core and SQL; improvement in Core, Shuffle, and Mesos</li>
  <li>Jacek Lewandowski &#8211; Bug fixes in Core</li>
  <li>Jacky Li &#8211; Improvements in SQL</li>
  <li>Jaonary Rabarisoa &#8211; Improvements in MLlib</li>
  <li>Jayson Sunshine &#8211; Documentation in Core</li>
  <li>Jean Lyn &#8211; Bug fixes in SQL</li>
  <li>Jeff Harrison &#8211; Improvements in SparkR</li>
  <li>Jeremy A. Lucas &#8211; Improvements in Streaming</li>
  <li>Jeremy Freeman &#8211; Bug fixes in Streaming and MLlib</li>
  <li>Jim Carroll &#8211; Bug fixes in MLlib</li>
  <li>Jin Adachi &#8211; Bug fixes in SQL</li>
  <li>Jongyoul Lee &#8211; Improvements in Core and Mesos; bug fixes in Core</li>
  <li>Joseph K. Bradley &#8211; Improvements in MLlib; documentation in PySpark, Core, SQL, MLlib, and Streaming; new features in MLlib; bug fixes in Java API, Core, MLlib, and PySpark; improvement in MLlib and PySpark</li>
  <li>Josh Rosen &#8211; Improvements in Core and SQL; new features in Core, Shuffle, and SQL; bug fixes in Core, tests, Shuffle, Streaming, scheduler, SQL, and Java API; improvement in Core and Shuffle</li>
  <li>Judy Nash &#8211; Bug fixes in Windows and spark submit</li>
  <li>Judy Nash &#8211; Improvements in Core</li>
  <li>Juliet Hougland &#8211; Improvements in MLlib</li>
  <li>June He &#8211; Bug fixes in Core and tests</li>
  <li>Kai Sasaki &#8211; Documentation in Core and MLlib; improvements in MLlib and PySpark; bug fixes in MLlib and PySpark; improvement in MLlib and PySpark</li>
  <li>Kalle Jepsen &#8211; Improvements in PySpark and SQL; bug fixes in PySpark; improvement in PySpark</li>
  <li>Kamil Smuga &#8211; Bug fixes in Core and PySpark</li>
  <li>Kay Ousterhout &#8211; Improvements in Core, Web UI, and Shuffle; bug fixes in Project Infra, Core, Web UI, and tests</li>
  <li>Kevin (Sangwoo) Kim &#8211; Bug fixes in Core</li>
  <li>Kirill A. Korinskiy &#8211; New features in MLlib</li>
  <li>Kousuke Saruta &#8211; Improvements in Streaming, Web UI, and tests; bug fixes in Web UI, scheduler, tests, and YARN; improvement in Web UI</li>
  <li>LCY Vincent &#8211; Documentation in Core</li>
  <li>Leah McGuire &#8211; Improvements and new features in MLlib</li>
  <li>Lev Khomich &#8211; Improvements in Core</li>
  <li>Liang-Chi Hsieh &#8211; Improvements in MLlib and SQL; improvement in MLlib; new features in SQL; bug fixes in Core, Shuffle, PySpark, MLlib, SQL, and spark submit; documentation in Core and MLlib</li>
  <li>Liangliang Gu &#8211; Improvements in Core and Web UI; bug fixes in Web UI</li>
  <li>Lianhui Wang &#8211; Improvements in GraphX; bug fixes in PySpark</li>
  <li>Liu Chang &#8211; Improvements in EC2</li>
  <li>Lomig Megard &#8211; Documentation in Core</li>
  <li>Madhukara Phatak &#8211; Documentation in SQL</li>
  <li>Manoj Kumar &#8211; Improvements in MLlib; new features in SQL, MLlib, and PySpark; bug fixes in Streaming, MLlib, and SQL; improvement in MLlib and PySpark</li>
  <li>Marcelo Vanzin &#8211; Improvements in Core; bug fixes in Core, tests, Shuffle, YARN, Streaming, and spark submit; improvement in Core</li>
  <li>Mark Bittmann &#8211; Bug fixes in MLlib</li>
  <li>Marko Bonaci &#8211; Documentation in Core</li>
  <li>Masaru Dobashi &#8211; Documentation in Core</li>
  <li>Masayoshi TSUZUKI &#8211; Bug fixes in Windows and Core</li>
  <li>Matei Zaharia &#8211; Improvement in Web UI</li>
  <li>Matt Aasted &#8211; Bug fixes in EC2</li>
  <li>Matt Massie &#8211; New features in SparkR</li>
  <li>Matt Wise &#8211; Documentation in Core</li>
  <li>Matthew Cheah &#8211; Improvements and new features in Core</li>
  <li>Matthew Goodman &#8211; Bug fixes in EC2 and PySpark</li>
  <li>Max Seiden &#8211; Bug fixes in SQL</li>
  <li>Meethu Mathew &#8211; Bug fixes in MLlib and PySpark</li>
  <li>Michael Armbrust &#8211; Documentation in Core; new features in SQL; improvements in SQL; bug fixes in SQL; improvement in Core and SQL</li>
  <li>Michael Griffiths &#8211; Bug fixes in Windows and Core</li>
  <li>Michael Malak &#8211; Bug fixes in GraphX</li>
  <li>Michael Nazario &#8211; Bug fixes in tests and PySpark</li>
  <li>Michelangelo D&#8217;Agostino &#8211; Bug fixes in EC2</li>
  <li>Michelle Casbon &#8211; Improvements in Project Infra</li>
  <li>Miguel Peralvo &#8211; Improvements in EC2</li>
  <li>Mike Dusenberry &#8211; Improvements in Core and MLlib; documentation in Core; bug fixes in Core and MLlib</li>
  <li>Milan Straka &#8211; Bug fixes in Core and PySpark</li>
  <li>Misha Chernetsov &#8211; Improvements in Core and SQL</li>
  <li>Mridul Muralidharan &#8211; Improvements in Core and Shuffle</li>
  <li>Nan Zhu &#8211; Improvements in Core and tests; bug fixes in Core and SQL</li>
  <li>Nathan Howell &#8211; Improvements and new features in SQL</li>
  <li>Nathan Kronenfeld &#8211; Bug fixes in Core</li>
  <li>Nathan McCarthy &#8211; Bug fixes in Core</li>
  <li>Nicholas Chammas &#8211; Improvements in Core and EC2; bug fixes in EC2</li>
  <li>Nishkam Ravi &#8211; Improvements in Core; documentation in Core; bug fixes in Core and YARN</li>
  <li>Nobuyuki Kuromatsu &#8211; Bug fixes in MLlib</li>
  <li>Octavian Geagla &#8211; Improvements in MLlib; documentation in Java API, Core, and MLlib</li>
  <li>Oleg Sidorkin &#8211; Bug fixes in SQL</li>
  <li>Oleksii Kostyliev &#8211; Bug fixes in Core</li>
  <li>Olivier Girardot &#8211; Improvements in Java API and SQL; bug fixes in Core; improvement in PySpark and SQL</li>
  <li>Omede Firouz &#8211; Improvements in MLlib; new features in MLlib and PySpark</li>
  <li>Oscar Olmedo &#8211; New features in SparkR</li>
  <li>Pankaj Arora &#8211; Bug fixes in Core</li>
  <li>Patrick Wendell &#8211; Test in spark submit; improvements in Core and Shuffle; bug fixes in tests and SQL</li>
  <li>Pei-Lun Lee &#8211; Improvements and bug fixes in SQL</li>
  <li>Peter Parente &#8211; Improvements in Core</li>
  <li>Peter Rudenko &#8211; Documentation in Core</li>
  <li>Pierre Borckmans &#8211; Documentation in Core and EC2</li>
  <li>Prabeesh K &#8211; Improvements in Streaming</li>
  <li>Pradeep Chanumolu &#8211; Improvements in Core</li>
  <li>Prashant Sharma &#8211; Improvements and bug fixes in Core</li>
  <li>Punya Biswal &#8211; Improvements in SQL; bug fixes in Core</li>
  <li>Punyashloka Biswal &#8211; Build fixes in Core</li>
  <li>Qian Huang &#8211; New features and improvement in SparkR</li>
  <li>Qiping Li &#8211; Bug fixes in Core</li>
  <li>Rajendra Gokhale (rvgcentos) &#8211; Improvements in Core</li>
  <li>Rakesh Chalasani &#8211; Improvement in SQL</li>
  <li>Ram Sriharsha &#8211; Improvements in Core, MLlib, and PySpark; new features in MLlib; documentation in Core and MLlib</li>
  <li>Rekha Joshi &#8211; Improvements in SparkR</li>
  <li>Rene Treffer &#8211; Improvements in SQL</li>
  <li>Rex Xiong &#8211; Improvements in Core</li>
  <li>Reynold Xin &#8211; Improvements in Project Infra, Core, tests, PySpark, and SQL; documentation in Core; bug fixes in Core and MLlib; improvement in Project Infra, Core, GraphX, and SQL</li>
  <li>Reza Zadeh &#8211; Improvements in MLlib</li>
  <li>Ryan Hafen &#8211; New features in SparkR</li>
  <li>Ryan Williams &#8211; Improvements in Core</li>
  <li>Saisai Shao &#8211; Test in Streaming and tests; improvements in Core, PySpark, YARN, and Streaming; new features in Web UI; bug fixes in Web UI and YARN; improvement in Streaming</li>
  <li>Saleem Ansari &#8211; Documentation in Core and MLlib</li>
  <li>Sandy Ryza &#8211; Improvements in Core, Shuffle, and MLlib; documentation in Core and MLlib; bug fixes in Core and YARN; improvement in MLlib</li>
  <li>Santiago M. Mola &#8211; Improvements in SQL; bug fixes in SQL; documentation in Core</li>
  <li>Sasaki Toru &#8211; Improvements in Core and GraphX</li>
  <li>Sean Owen &#8211; Documentation in Core; improvements in Core, tests, MLlib, Streaming, SQL, and Web UI; bug fixes in Project Infra, Core, tests, Windows, SQL, GraphX, and Web UI; improvement in Core</li>
  <li>Sephiroth Lin &#8211; Improvements in SparkR, Core, scheduler, YARN, and PySpark; bug fixes in SQL</li>
  <li>Shekhar Bansal &#8211; Improvements in YARN; bug fixes in Web UI</li>
  <li>Sheng Li &#8211; Bug fixes in SQL</li>
  <li>Shiti Saxena &#8211; Improvement in SQL</li>
  <li>Shivaram Venkataraman &#8211; Improvements in SparkR and EC2; new features in Core and SparkR; bug fixes in SparkR; improvement in SparkR</li>
  <li>Shixiong Zhu &#8211; Test in Streaming, tests, and Core; improvement in Streaming, Web UI, and Core; improvements in Streaming, Web UI, and Core; bug fixes in Core, tests, MLlib, YARN, Streaming, scheduler, and Web UI; documentation in Core and Streaming</li>
  <li>Shuai Zheng &#8211; Bug fixes in SQL</li>
  <li>Shuo Xiang &#8211; New features in Core; bug fixes in MLlib</li>
  <li>Stephen Boesch &#8211; Bug fixes in MLlib</li>
  <li>Stephen Haberman &#8211; Bug fixes in Core</li>
  <li>Steve Loughran &#8211; Improvements in Core, Web UI, and SQL; bug fixes in Core and YARN</li>
  <li>Steven She &#8211; Bug fixes in Core</li>
  <li>Su Yan &#8211; Bug fixes in Core</li>
  <li>Sun Rui &#8211; Improvements in SparkR; new features in SparkR and SQL; bug fixes in SparkR; improvement in SparkR</li>
  <li>Taka Shinagawa &#8211; Documentation in Core</li>
  <li>Takeshi YAMAMURO &#8211; Improvements in GraphX and SQL</li>
  <li>Tathagata Das &#8211; Test in Streaming and tests; improvements in Streaming and Core; new features in Streaming and SQL; bug fixes in Project Infra, Streaming, and Core</li>
  <li>Ted Yu &#8211; Improvements in Core; bug fixes in Core and PySpark</li>
  <li>Theodore Vasiloudis &#8211; Improvements in Core; bug fixes in Core and EC2</li>
  <li>Thomas Graves &#8211; Bug fixes in Core</li>
  <li>Tijo Thomas &#8211; Improvements in Core; bug fixes in Core and SQL</li>
  <li>Tim Ellison &#8211; Bug fixes in Core</li>
  <li>Timothy Chen &#8211; Improvements in spark submit and Mesos; bug fixes in spark submit and Mesos</li>
  <li>Tingjun Xu &#8211; Improvements in Streaming</li>
  <li>Todd Gao &#8211; SparkR</li>
  <li>Venkata Ramana Gollamudi &#8211; Improvements and bug fixes in SQL</li>
  <li>Vidmantas Zemleris &#8211; Improvements in SQL</li>
  <li>Vincenzo Selvaggio &#8211; Documentation and new features in MLlib</li>
  <li>Vinod K C &#8211; Improvements in Shuffle and scheduler; bug fixes in Core and SQL</li>
  <li>Vinod KC &#8211; Bug fixes in Core and SQL</li>
  <li>Volodymyr Lyubinets &#8211; Improvements and bug fixes in SQL</li>
  <li>Vyacheslav Baranov &#8211; Bug fixes in SQL</li>
  <li>Wang Fei &#8211; Improvements, new features, and bug fixes in SQL</li>
  <li>Wang Tao &#8211; Improvements in Core, YARN, and SQL; new features in spark submit; bug fixes in Core, spark submit, and SQL</li>
  <li>Wenchen Fan &#8211; Improvements in Core; documentation in Core; bug fixes in SQL; improvement in SQL</li>
  <li>Wesley Miao &#8211; Bug fixes in Streaming</li>
  <li>Xiangrui Meng &#8211; New features in SQL, MLlib, and PySpark; umbrella in MLlib; documentation in PySpark, Core, SQL, MLlib, and Streaming; improvement in Core, SQL, MLlib, and PySpark; build fixes in GraphX and MLlib; improvements in Core, SQL, MLlib, and PySpark; bug fixes in Java API, Web UI, SQL, MLlib, and PySpark</li>
  <li>Xu Kun &#8211; New features in Core</li>
  <li>Xusen Yin &#8211; Documentation in Core and MLlib; improvement in MLlib</li>
  <li>Yadong Qi &#8211; Improvements and bug fixes in SQL</li>
  <li>Yanbo Liang &#8211; Improvements in Core, MLlib, and PySpark; new features in MLlib and PySpark; bug fixes in MLlib and SQL; improvement in MLlib and PySpark</li>
  <li>Yash Datta &#8211; Improvements and bug fixes in SQL</li>
  <li>Ye Xianjin &#8211; Bug fixes in Core</li>
  <li>Yi Lu &#8211; New features in SparkR</li>
  <li>Yi Tian &#8211; New features in Web UI and SQL; bug fixes in SQL</li>
  <li>Yin Huai &#8211; Improvements in tests and SQL; new features in SQL; bug fixes in Core and SQL; improvement in Core and SQL</li>
  <li>Yong Tang &#8211; Bug fixes in Core</li>
  <li>Yu ISHIKAWA &#8211; Improvements in MLlib</li>
  <li>Yuhao Yang &#8211; Improvements in Core and MLlib; new features in MLlib; documentation in Core and MLlib</li>
  <li>Yuri Saito &#8211; Bug fixes in SQL</li>
  <li>Zhan Zhang &#8211; Improvements in Core; new features in Core and SQL</li>
  <li>Zhang, Liye &#8211; Documentation in Core; bug fixes in Core and Web UI</li>
  <li>Zhichao Li &#8211; Bug fixes in Streaming, Web UI, and Core</li>
  <li>Zhichao Zhang &#8211; Improvements in SQL; bug fixes in Streaming; documentation in Core</li>
  <li>Zhongshuai Pei &#8211; Improvements and bug fixes in SQL</li>
  <li>Zoltan Zvara &#8211; Bug fixes in Core and YARN</li>
  <li>Zongheng Yang &#8211; New features in SparkR</li>
</ul>

<p><em>Thanks to everyone who contributed!</em></p>


<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are <a href="https://www.apache.org/foundation/marks/">trademarks</a> of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

</body>
</html>