summaryrefslogtreecommitdiff
path: root/site/releases/spark-release-1-3-0.html
blob: aaf84868fa39e2a02503e1a69e624c03e38b8a66 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark Release 1.3.0 | Apache Spark
    
  </title>

  

  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <!-- Code highlighter CSS -->
  <link href="/css/pygments-default.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {
    try {
      _gaq.push(['_trackEvent', category , action]);
    } catch(err){}

    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo-trademark.png"
        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
          Lightning-fast cluster computing
      </span>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Libraries <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/sql/">SQL and DataFrames</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="/graphx/">GraphX (graph)</a></li>
          <li class="divider"></li>
          <li><a href="http://spark-packages.org">Third-Party Packages</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/docs/latest/">Latest Release (Spark 2.0.0)</a></li>
          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
        </ul>
      </li>
      <li><a href="/examples.html">Examples</a></li>
      <li class="dropdown">
        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">Project Committers</a></li>
          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
    <ul class="nav navbar-nav navbar-right">
      <li class="dropdown">
        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
          Apache Software Foundation <b class="caret"></b></a>
        <ul class="dropdown-menu">
          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
          <li><a href="http://www.apache.org/licenses/">License</a></li>
          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
          <li><a href="http://www.apache.org/security/">Security</a></li>
        </ul>
      </li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-2-0-0-released.html">Spark 2.0.0 released</a>
          <span class="small">(Jul 26, 2016)</span></li>
        
          <li><a href="/news/spark-1-6-2-released.html">Spark 1.6.2 released</a>
          <span class="small">(Jun 25, 2016)</span></li>
        
          <li><a href="/news/submit-talks-to-spark-summit-eu-2016.html">Call for Presentations for Spark Summit EU is Open</a>
          <span class="small">(Jun 16, 2016)</span></li>
        
          <li><a href="/news/spark-2.0.0-preview.html">Preview release of Spark 2.0</a>
          <span class="small">(May 26, 2016)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Built-in Libraries:
      </p>
      <ul class="list-none">
        <li><a href="/sql/">SQL and DataFrames</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="/graphx/">GraphX (graph)</a></li>
      </ul>
      <a href="http://spark-packages.org">Third-Party Packages</a>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <h2>Spark Release 1.3.0</h2>


<p>Spark 1.3.0 is the fourth release on the 1.X line. This release brings a new DataFrame API alongside the graduation of Spark SQL from an alpha project. It also brings usability improvements in Spark’s core engine and expansion of MLlib and Spark Streaming. Spark 1.3 represents the work of 174 contributors from more than 60 institutions in more than 1000 individual patches.</p>

<p>To download Spark 1.3 visit the <a href="/downloads.html">downloads</a> page.</p>

<h3 id="spark-core">Spark Core</h3>
<p>Spark 1.3 sees a handful of usability improvements in the core engine. The core API now supports <a href="https://issues.apache.org/jira/browse/SPARK-5430">multi level aggregation trees</a> to help speed up expensive reduce operations. <a href="https://issues.apache.org/jira/browse/SPARK-5063">Improved error reporting</a> has been added for certain gotcha operations. Spark&#8217;s Jetty dependency is <a href="https://issues.apache.org/jira/browse/SPARK-3996">now shaded</a> to help avoid conflicts with user programs. Spark now supports <a href="https://issues.apache.org/jira/browse/SPARK-3883">SSL encryption</a> for some communication endpoints. Finaly, realtime <a href="https://issues.apache.org/jira/browse/SPARK-3428">GC metrics</a> and <a href="https://issues.apache.org/jira/browse/SPARK-4874">record counts</a> have been added to the UI.</p>

<h3 id="dataframe-api">DataFrame API</h3>
<p>Spark 1.3 adds a new <a href="/docs/1.3.0/sql-programming-guide.html#dataframes">DataFrames API</a> that provides powerful and convenient operators when working with structured datasets. The DataFrame is an evolution of the base RDD API that includes named fields along with schema information. It’s easy to construct a DataFrame from sources such as Hive tables, JSON data, a JDBC database, or any implementation of Spark’s new data source API. Data frames will become a common interchange format between Spark components and when importing and exporting data to other systems. Data frames are supported in Python, Scala, and Java.</p>

<h3 id="spark-sql">Spark SQL</h3>
<p>In this release Spark SQL <a href="https://issues.apache.org/jira/browse/SPARK-5166">graduates from an alpha project</a>, providing backwards compatibility guarantees for the HiveQL dialect and stable programmatic API’s. Spark SQL adds support for <a href="https://issues.apache.org/jira/browse/SPARK-5658">writing tables in the data sources API</a>. A new <a href="https://issues.apache.org/jira/browse/SPARK-5472">JDBC data source</a> allows importing and exporting from MySQL, Postgres, and other RDBMS systems. A variety of small changes have expanded the coverage of HiveQL in Spark SQL. Spark SQL also adds support schema evolution with the ability to <a href="https://issues.apache.org/jira/browse/SPARK-3851">merging compatible schemas in Parquet</a>.</p>

<h3 id="spark-mlmllib">Spark ML/MLlib</h3>
<p>In this release Spark MLlib introduces several new algorithms: latent Dirichlet allocation (LDA) for <a href="https://issues.apache.org/jira/browse/SPARK-1405">topic modeling</a>, <a href="https://issues.apache.org/jira/browse/SPARK-2309">multinomial logistic regression</a> for multiclass classification, <a href="https://issues.apache.org/jira/browse/SPARK-5012">Gaussian mixture model (GMM)</a> and <a href="https://issues.apache.org/jira/browse/SPARK-4259">power iteration clustering</a> for clustering, <a href="https://issues.apache.org/jira/browse/SPARK-4001">FP-growth</a> for frequent pattern mining, and <a href="https://issues.apache.org/jira/browse/SPARK-4409">block matrix abstraction</a> for distributed linear algebra. Initial support has been added for <a href="https://issues.apache.org/jira/browse/SPARK-4587">model import/export</a> in exchangeable format, which will be expanded in future versions to cover more model types in Java/Python/Scala. The implementations of k-means and ALS receive <a href="https://issues.apache.org/jira/browse/SPARK-3424, https://issues.apache.org/jira/browse/SPARK-3541">updates</a> that lead to significant performance gain. PySpark now supports the <a href="https://issues.apache.org/jira/browse/SPARK-4586">ML pipeline API</a> added in Spark 1.2, and <a href="https://issues.apache.org/jira/browse/SPARK-5094">gradient boosted trees</a> and <a href="https://issues.apache.org/jira/browse/SPARK-5012">Gaussian mixture model</a>. Finally, the ML pipeline API has been ported to support the new DataFrames abstraction.</p>

<h3 id="spark-streaming">Spark Streaming</h3>
<p>Spark 1.3 introduces a new <a href="https://issues.apache.org/jira/browse/SPARK-4964"><em>direct</em> Kafka API</a> (<a href="http://spark.apache.org/docs/1.3.0/streaming-kafka-integration.html">docs</a>) which enables exactly-once delivery without the use of write ahead logs. It also adds a <a href="https://issues.apache.org/jira/browse/SPARK-5047">Python Kafka API</a> along with infrastructure for additional Python API’s in future releases. An online version of <a href="https://issues.apache.org/jira/browse/SPARK-4979">logistic regression</a> and the ability to read <a href="https://issues.apache.org/jira/browse/SPARK-4969">binary records</a> have also been added. For stateful operations, support has been added for loading of an <a href="https://issues.apache.org/jira/browse/SPARK-3660">initial state RDD</a>. Finally, the streaming programming guide has been updated to include information about SQL and DataFrame operations within streaming applications, and important clarifications to the fault-tolerance semantics.</p>

<h3 id="graphx">GraphX</h3>
<p>GraphX adds a handful of utility functions in this release, including conversion into a <a href="https://issues.apache.org/jira/browse/SPARK-4917">canonical edge graph</a>.</p>

<h2 id="upgrading-to-spark-13">Upgrading to Spark 1.3</h2>
<p>Spark 1.3 is binary compatible with Spark 1.X releases, so no code changes are necessary. This excludes API’s marked explicitly as unstable.</p>

<p>As part of stabilizing the Spark SQL API, the <code>SchemaRDD</code> class has been renamed to <code>DataFrame</code>. Spark SQL&#8217;s <a href="http://spark.apache.org/docs/1.3.0/sql-programming-guide.html#migration-guide">migration guide</a> describes the upgrade process in detail. Spark SQL also now requires that column identifiers which use reserved words (such as &#8220;string&#8221; or &#8220;table&#8221;) be escaped using backticks.</p>

<h3 id="known-issues">Known Issues</h3>
<p>This release has few known issues which will be addressed in Spark 1.3.1:</p>

<ul>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6194">SPARK-6194</a>: A memory leak in PySPark&#8217;s <code>collect()</code>.</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6222">SPARK-6222</a>: An issue with failure recovery in Spark Streaming.</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6315">SPARK-6315</a>: Spark SQL can&#8217;t read parquet data generated with Spark 1.1.</li>
  <li><a href="https://issues.apache.org/jira/browse/SPARK-6247">SPARK-6247</a>: Errors analyzing certain join types in Spark SQL.</li>
</ul>

<h3 id="credits">Credits</h3>
<ul>
  <li>Aaron Davidson &#8211; Bug fixes in Core</li>
  <li>Alex Baretta &#8211; Improvement in Core</li>
  <li>Alex Liu &#8211; Improvements in Core and SQL; bug fixes in SQL</li>
  <li>Alexander Bezzubov &#8211; Documentation in Core</li>
  <li>Alexander Ulanov &#8211; Umbrella in MLlib; documentation in Core and MLlib; new features in MLlib</li>
  <li>Andrew Ash &#8211; Documentation in Core</li>
  <li>Andrew Or &#8211; Improvements in Core and YARN; bug fixes in Core and YARN</li>
  <li>Andrew Rowson &#8211; Bug fixes in YARN</li>
  <li>Andrey Zagrebin &#8211; Improvements in Core and PySpark</li>
  <li>Antonio Navarro Perez &#8211; Documentation in Core</li>
  <li>Ben Cook &#8211; Test in MLlib and PySpark; improvements in PySpark and SQL; new features in Core</li>
  <li>Bilna P &#8211; Test in Streaming</li>
  <li>Brennon York &#8211; New features in Core; bug fixes in Core, GraphX, and scheduler; improvement in Core</li>
  <li>Burak Yavuz &#8211; Improvements in spark submit and MLlib; new features in Core and MLlib; bug fixes in Core and spark submit; documentation in Core and MLlib</li>
  <li>Cheng Hao &#8211; Improvements in SQL; new features in SQL; bug fixes in Core and SQL</li>
  <li>Cheng Lian &#8211; Documentation in Core; test in SQL; improvements in Core and SQL; bug fixes in Core, tests, and SQL; improvement in SQL</li>
  <li>Cheolsoo Park &#8211; Bug fixes in YARN</li>
  <li>Chip Senkbeil &#8211; Bug fixes in Core</li>
  <li>Christophe Preaud &#8211; Improvements in Core</li>
  <li>Cody Koeninger &#8211; Improvements in Streaming</li>
  <li>DB Tsai &#8211; Improvements in MLlib; documentation in Core and MLlib; new features in MLlib; bug fixes in MLlib; improvement in MLlib</li>
  <li>Dale Richardson &#8211; Improvement in Core</li>
  <li>Daniel Darabos &#8211; Bug fixes in Core</li>
  <li>Daoyuan Wang &#8211; Improvement in SQL; improvements in Core and SQL; new features in Core and SQL; bug fixes in SQL; documentation in Core</li>
  <li>David Y. Ross &#8211; Umbrella in Core</li>
  <li>Davies Liu &#8211; Improvements in PySpark; documentation in Core and PySpark; new features in Streaming and PySpark; bug fixes in Streaming, Core, PySpark, MLlib, and SQL; improvement in PySpark and SQL</li>
  <li>Derek Ma &#8211; Bug fixes in Shuffle</li>
  <li>Doing Done &#8211; Improvements in SQL</li>
  <li>Elmer Garduno &#8211; Bug fixes in Core</li>
  <li>Emre Sevinc &#8211; Documentation in Core and MLlib</li>
  <li>Eric Moyer &#8211; Documentation in Core</li>
  <li>Ernest &#8211; Improvements in Core and GraphX</li>
  <li>Evan Yu &#8211; Bug fixes in Core</li>
  <li>Fan Jiang &#8211; New features in MLlib</li>
  <li>Fernando Otero (ZeoS) &#8211; Improvements in MLlib</li>
  <li>Gabe Mulley &#8211; Bug fixes in PySpark and SQL</li>
  <li>Gang Li &#8211; Bug fixes in Core</li>
  <li>Gankun Luo &#8211; Improvements in Core; bug fixes in SQL</li>
  <li>Gaspar Munoz &#8211; Documentation in Core</li>
  <li>Gen TANG &#8211; Bug fixes in EC2</li>
  <li>Grzegorz Dubicki &#8211; Improvements in EC2</li>
  <li>Guo Wei &#8211; Bug fixes in SQL</li>
  <li>GuoQiang Li &#8211; Improvements in Core; bug fixes in Core and YARN</li>
  <li>Hari Shreedharan &#8211; Bug fixes in Streaming, tests, and YARN</li>
  <li>Holden Karau &#8211; Improvements in EC2</li>
  <li>Huang Zhaowei &#8211; Bug fixes in Core and YARN</li>
  <li>Hung Lin &#8211; Improvements in SQL</li>
  <li>Ilayaperumal Gopinathan &#8211; Bug fixes in Streaming</li>
  <li>Ilya Ganelin &#8211; Improvements in Core; bug fixes in Core and Shuffle</li>
  <li>Imran Rashid &#8211; Bug fixes in Core</li>
  <li>Iulian Dragos &#8211; Test in Streaming</li>
  <li>Ivan Vergiliev &#8211; Improvements in Core</li>
  <li>Jacek Lewandowski &#8211; Bug fixes in Core</li>
  <li>Jacky Li &#8211; Improvements in MLlib and SQL; new features in MLlib; bug fixes in MLlib and SQL</li>
  <li>Jakub Dubovsky &#8211; Improvements in MLlib</li>
  <li>Jeremy Freeman &#8211; Improvements in Streaming and PySpark; new features in Streaming and MLlib; bug fixes in MLlib and PySpark</li>
  <li>Jesper Lundgren &#8211; Bug fixes in Streaming</li>
  <li>Jongyoul Lee &#8211; Improvements in Core and Mesos; documentation in Streaming; bug fixes in Core, Mesos, and SQL</li>
  <li>Joseph J.C. Tang &#8211; Bug fixes in MLlib</li>
  <li>Joseph K. Bradley &#8211; New features in MLlib; umbrella in MLlib; documentation in Core and MLlib; improvement in MLlib; improvements in GraphX, MLlib, and SQL; bug fixes in Core, GraphX, PySpark, MLlib, and SQL</li>
  <li>Josh Rosen &#8211; Bug fixes in Core</li>
  <li>Josh Rosen &#8211; Improvements in Core, tests, EC2, and SQL; new features in Core; bug fixes in Core, tests, PySpark, Streaming, scheduler, SQL, spark submit, and Web UI</li>
  <li>Judy Nash &#8211; New features in SQL</li>
  <li>Kai Sasaki &#8211; Documentation in Core and PySpark; bug fixes in Core and MLlib</li>
  <li>Kanwaljit Singh &#8211; Bug fixes in Core</li>
  <li>Kashish Jain &#8211; Bug fixes in YARN</li>
  <li>Kay Ousterhout &#8211; Improvements in Web UI; new features in Core; bug fixes in Core and SQL</li>
  <li>Kazuki Taniguchi &#8211; New features in MLlib and PySpark</li>
  <li>Kenji Kikushima &#8211; Bug fixes in GraphX</li>
  <li>Kenneth Myers &#8211; Documentation in Streaming</li>
  <li>Kirill A. Korinskiy &#8211; Bug fixes in Web UI</li>
  <li>Kostas Sakellis &#8211; Improvements in Core, Web UI, and YARN; bug fixes in Core; improvement in Core</li>
  <li>Kousuke Saruta &#8211; Improvements in Core, Web UI, and YARN; new features in Streaming and PySpark; bug fixes in Core and Web UI; documentation in Core</li>
  <li>Kuldeep &#8211; Bug fixes in SQL</li>
  <li>Li Zhihui &#8211; Documentation in Core</li>
  <li>Liang-Chi Hsieh &#8211; Improvements in Core, MLlib, and SQL; test in Core; documentation in Core; bug fixes in Core and SQL</li>
  <li>Liangliang Gu &#8211; Bug fixes in Web UI</li>
  <li>Lianhui Wang &#8211; Improvements in YARN; bug fixes in Core and YARN</li>
  <li>Liu Hao &#8211; Bug fixes in GraphX</li>
  <li>Liu Jiongzhou &#8211; Bug fixes in MLlib</li>
  <li>Lu Yan &#8211; Improvements in SQL</li>
  <li>Lukasz Jastrzebski &#8211; Bug fixes in Core</li>
  <li>Madhu Siddalingaiah &#8211; Documentation in Core</li>
  <li>Makoto Fukuhara &#8211; Improvements in Core</li>
  <li>Manoj Kumar &#8211; Improvements in MLlib and PySpark; documentation in Core and MLlib</li>
  <li>Marcelo Vanzin &#8211; Improvements in Core and YARN; bug fixes in Core, PySpark, YARN, and SQL</li>
  <li>Markus Dale &#8211; Bug fixes in Core</li>
  <li>Martin Zapletal &#8211; Documentation in Core and MLlib; new features in MLlib</li>
  <li>Masayoshi TSUZUKI &#8211; Improvements in Web UI; bug fixes in Windows, Core, and YARN</li>
  <li>Matei Zaharia &#8211; Improvements in Core</li>
  <li>Matt Whelan &#8211; Bug fixes in Core</li>
  <li>Matthew Cheah &#8211; Bug fixes in Core</li>
  <li>Mayur Rustagi &#8211; Documentation in Streaming</li>
  <li>Meethu Mathew &#8211; New features in MLlib and PySpark</li>
  <li>Michael Armbrust &#8211; Improvements in Core; bug fixes in Core, MLlib, and SQL; improvement in SQL</li>
  <li>Michael Davies &#8211; Improvements in SQL</li>
  <li>Michael Nazario &#8211; Improvements and bug fixes in PySpark</li>
  <li>Mike Jennings &#8211; New features in EC2</li>
  <li>Mingyu Kim &#8211; Bug fixes in Core</li>
  <li>Nan Zhu &#8211; Improvements in Streaming; documentation in Core; bug fixes in Core and Streaming</li>
  <li>Nate Crosswhite &#8211; Improvements in MLlib and PySpark</li>
  <li>Nathan Kronenfeld &#8211; Bug fixes in Core</li>
  <li>Nathan McCarthy &#8211; Bug fixes in Core</li>
  <li>Nicholas Chammas &#8211; Improvements in EC2; umbrella in EC2; bug fixes in EC2; documentation in Core</li>
  <li>Nishkam Ravi &#8211; Bug fixes in Core</li>
  <li>Octavian Geagla &#8211; Improvements in MLlib</li>
  <li>Patrick Wendell &#8211; Improvements in Core; bug fixes in Core, tests, and Streaming; improvement in Core</li>
  <li>Paul Power &#8211; Documentation in Core</li>
  <li>Peishen Jia &#8211; New features in MLlib</li>
  <li>Peng Xu &#8211; Documentation in Core</li>
  <li>Peter Klipfel &#8211; Documentation in Core</li>
  <li>Peter Rudenko &#8211; Improvements in MLlib</li>
  <li>Peter Vandenabeele &#8211; Documentation in Core</li>
  <li>Prabeesh K &#8211; Improvements in Streaming</li>
  <li>Prashant Sharma &#8211; New features in Core; bug fixes in Core; improvement in Core and Web UI</li>
  <li>RJ Nowling &#8211; New features in MLlib and PySpark</li>
  <li>Ravindra Pesala &#8211; Improvements in SQL</li>
  <li>Reynold Xin &#8211; Improvements in Core, Shuffle, and SQL; documentation in Core; bug fixes in Core and SQL; improvement in Java API and SQL</li>
  <li>Reza Zadeh &#8211; Improvements in MLlib</li>
  <li>Ryan Williams &#8211; Improvements, bug fixes, and documentation in Core</li>
  <li>Sadhan Sood &#8211; Bug fixes in SQL</li>
  <li>Saisai Shao &#8211; Improvements in Streaming; bug fixes in Streaming, SQL, and Core; improvement in Streaming</li>
  <li>Sam Halliday &#8211; Improvements in Core</li>
  <li>Sandy Ryza &#8211; Improvements in Core and YARN; bug fixes in Core and YARN; improvement in YARN</li>
  <li>Sasaki Toru &#8211; Improvements in SQL</li>
  <li>Sean Owen &#8211; Documentation in Core; wish in Core; improvements in Java API, Core, MLlib, EC2, and Streaming; bug fixes in Core, tests, MLlib, YARN, Streaming, SQL, Java API, Web UI, and GraphX; improvement in Core</li>
  <li>Shekhar Bansal &#8211; Bug fixes in YARN</li>
  <li>Sheng Li &#8211; Improvements in Core and SQL; new features in SQL; bug fixes in SQL; documentation in Core</li>
  <li>Shixiong Zhu &#8211; Test in Core; improvement in Core; improvements in Streaming, SQL, Shuffle, YARN, and Core; bug fixes in Core, SQL, and Streaming; documentation in Core, YARN, and Streaming</li>
  <li>Shuo Xiang &#8211; New features in MLlib</li>
  <li>Soumitra Kumar &#8211; New features in Streaming</li>
  <li>Stephen Boesch &#8211; Documentation in Core and MLlib</li>
  <li>Stephen Haberman &#8211; Bug fixes in Core</li>
  <li>Su Yan &#8211; Improvements in Core; bug fixes in Core and Web UI</li>
  <li>Takayuki Hasegawa &#8211; Bug fixes in Project Infra</li>
  <li>Takeshi Yamamuro &#8211; Improvements in GraphX; documentation in Core and SQL; bug fixes in GraphX</li>
  <li>Takuya UESHIN &#8211; Improvements and bug fixes in SQL</li>
  <li>Tathagata Das &#8211; Improvements in Streaming; bug fixes in Core, Web UI, PySpark, tests, and Streaming</li>
  <li>Thomas Graves &#8211; Bug fixes in Core</li>
  <li>Thu Kyaw &#8211; Improvements in Core and SQL</li>
  <li>Timothy Chen &#8211; Documentation in Core</li>
  <li>Tingjun Xu &#8211; Improvements in Core; bug fixes in Core and YARN</li>
  <li>Tobias Schlatter &#8211; Improvements and bug fixes in Core</li>
  <li>Tom Panning &#8211; Bug fixes in SQL</li>
  <li>Tor Myklebust &#8211; Improvements in SQL</li>
  <li>Travis Galoppo &#8211; Improvements in MLlib; documentation in Core and MLlib; new features in MLlib</li>
  <li>Tsuyoshi Ozawa &#8211; Documentation in Core and YARN</li>
  <li>Uncle Gen &#8211; Improvements in spark submit and Web UI; bug fixes in Core</li>
  <li>Varun Saxena &#8211; Improvements in Core</li>
  <li>Venkata Ramana Gollamudi &#8211; Bug fixes in Core and SQL; improvement in Core</li>
  <li>Vladimir Grigor &#8211; Bug fixes in EC2</li>
  <li>Vladimir Vladimirov &#8211; Improvements in PySpark</li>
  <li>Wang Fei &#8211; Improvement in SQL; improvements in Web UI and SQL; bug fixes in SQL; documentation in Core</li>
  <li>Wang Tao &#8211; Improvements in Core and YARN; bug fixes in Core and YARN</li>
  <li>Wenchen Fan &#8211; Bug fixes in SQL</li>
  <li>Winston Chen &#8211; Bug fixes in PySpark</li>
  <li>Xiangrui Meng &#8211; Improvements in PySpark, Core, Streaming, EC2, and MLlib; documentation in Core and MLlib; new features in MLlib and PySpark; bug fixes in PySpark, MLlib, and SQL; improvement in MLlib and PySpark</li>
  <li>Xiaohua Yi &#8211; Bug fixes in SQL</li>
  <li>Xiaojing Wang &#8211; Test in SQL; improvements in SQL; documentation in Core</li>
  <li>Xu Kun &#8211; Bug fixes in Core</li>
  <li>Yadong Qi &#8211; Bug fixes in SQL; Improvements in Streaming</li>
  <li>Yanbo Liang &#8211; Bug fixes in SQL, MLlib, and PySpark</li>
  <li>Yandu Oppacher &#8211; Improvements in PySpark</li>
  <li>Yantang Zhai &#8211; Improvements in Core and SQL; bug fixes in SQL</li>
  <li>Yash Datta &#8211; Bug fixes in SQL</li>
  <li>Ye Xianjin &#8211; Bug fixes in Core</li>
  <li>Yi Tian &#8211; Improvements and bug fixes in SQL</li>
  <li>Yin Huai &#8211; Documentation in Core; improvements in SQL; bug fixes in SQL; improvement in SQL</li>
  <li>Yuhao Yang &#8211; Improvements and bug fixes in MLlib</li>
  <li>Yuri Saito &#8211; Improvements in MLlib</li>
  <li>Yuu ISHIKAWA &#8211; New features in MLlib</li>
  <li>Zhan Zhang &#8211; Bug fixes in Core and YARN</li>
  <li>Zhang, Liye &#8211; Improvements in Core and Web UI; bug fixes in Core</li>
</ul>

<p><em>Thanks to everyone who contributed!</em></p>


<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are <a href="https://www.apache.org/foundation/marks/">trademarks</a> of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

</body>
</html>