summaryrefslogtreecommitdiff
path: root/site/releases/spark-release-2-0-0.html
blob: 66ca23bf117146392a2ba903aa61d8205a2e7cbd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark Release 2.0.0 | Apache Spark
    
  </title>

  

  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <!-- Code highlighter CSS -->
  <link href="/css/pygments-default.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {
    try {
      _gaq.push(['_trackEvent', category , action]);
    } catch(err){}

    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo-trademark.png"
        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
          Lightning-fast cluster computing
      </span>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Libraries <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/sql/">SQL and DataFrames</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="/graphx/">GraphX (graph)</a></li>
          <li class="divider"></li>
          <li><a href="http://spark-packages.org">Third-Party Packages</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/docs/latest/">Latest Release (Spark 2.0.0)</a></li>
          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
        </ul>
      </li>
      <li><a href="/examples.html">Examples</a></li>
      <li class="dropdown">
        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">Project Committers</a></li>
          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
    <ul class="nav navbar-nav navbar-right">
      <li class="dropdown">
        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
          Apache Software Foundation <b class="caret"></b></a>
        <ul class="dropdown-menu">
          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
          <li><a href="http://www.apache.org/licenses/">License</a></li>
          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
          <li><a href="http://www.apache.org/security/">Security</a></li>
        </ul>
      </li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-2-0-0-released.html">Spark 2.0.0 released</a>
          <span class="small">(Jul 26, 2016)</span></li>
        
          <li><a href="/news/spark-1-6-2-released.html">Spark 1.6.2 released</a>
          <span class="small">(Jun 25, 2016)</span></li>
        
          <li><a href="/news/submit-talks-to-spark-summit-eu-2016.html">Call for Presentations for Spark Summit EU is Open</a>
          <span class="small">(Jun 16, 2016)</span></li>
        
          <li><a href="/news/spark-2.0.0-preview.html">Preview release of Spark 2.0</a>
          <span class="small">(May 26, 2016)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Built-in Libraries:
      </p>
      <ul class="list-none">
        <li><a href="/sql/">SQL and DataFrames</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="/graphx/">GraphX (graph)</a></li>
      </ul>
      <a href="http://spark-packages.org">Third-Party Packages</a>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <h2>Spark Release 2.0.0</h2>


<p>Apache Spark 2.0.0 is the first release on the 2.x line. The major updates are API usability, SQL 2003 support, performance improvements, structured streaming, R UDF support, as well as operational improvements. In addition, this release includes over 2500 patches from over 300 contributors.</p>

<p>To download Apache Spark 2.0.0, visit the <a href="http://spark.apache.org/downloads.html">downloads</a> page. You can consult JIRA for the <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12315420&amp;version=12329449">detailed changes</a>. We have curated a list of high level changes here, grouped by major modules.</p>

<ul id="markdown-toc">
  <li><a href="#api-stability">API Stability</a></li>
  <li><a href="#core-and-spark-sql">Core and Spark SQL</a>    <ul>
      <li><a href="#programming-apis">Programming APIs</a></li>
      <li><a href="#sql">SQL</a></li>
      <li><a href="#new-features">New Features</a></li>
      <li><a href="#performance-and-runtime">Performance and Runtime</a></li>
    </ul>
  </li>
  <li><a href="#mllib">MLlib</a>    <ul>
      <li><a href="#new-features-1">New features</a></li>
      <li><a href="#speedscaling">Speed/scaling</a></li>
    </ul>
  </li>
  <li><a href="#sparkr">SparkR</a></li>
  <li><a href="#streaming">Streaming</a></li>
  <li><a href="#dependency-and-packaging-improvements">Dependency and Packaging Improvements</a></li>
  <li><a href="#removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</a>    <ul>
      <li><a href="#removals">Removals</a></li>
      <li><a href="#behavior-changes">Behavior Changes</a></li>
      <li><a href="#deprecations">Deprecations</a></li>
    </ul>
  </li>
  <li><a href="#known-issues">Known Issues</a></li>
  <li><a href="#credits">Credits</a></li>
</ul>

<h3 id="api-stability">API Stability</h3>

<p>Apache Spark 2.0.0 is the first release in the 2.X major line. Spark is guaranteeing stability of its non-experimental APIs for all 2.X releases. Although the APIs have stayed largely similar to 1.X, Spark 2.0.0 does have API breaking changes. They are documented in the <a href="#removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</a> section.</p>

<h3 id="core-and-spark-sql">Core and Spark SQL</h3>

<h4 id="programming-apis">Programming APIs</h4>

<p>One of the largest changes in Spark 2.0 is the new updated APIs:</p>

<ul>
  <li>Unifying DataFrame and Dataset: In Scala and Java, DataFrame and Dataset have been unified, i.e. DataFrame is just a type alias for Dataset of Row. In Python and R, given the lack of type safety, DataFrame is the main programming interface.</li>
  <li>SparkSession: new entry point that replaces the old SQLContext and HiveContext for DataFrame and Dataset APIs. SQLContext and HiveContext are kept for backward compatibility.</li>
  <li>A new, streamlined configuration API for SparkSession</li>
  <li>Simpler, more performant accumulator API</li>
  <li>A new, improved Aggregator API for typed aggregation in Datasets</li>
</ul>

<h4 id="sql">SQL</h4>

<p>Spark 2.0 substantially improved SQL functionalities with SQL2003 support. Spark SQL can now run all 99 TPC-DS queries. More prominently, we have improved:</p>

<ul>
  <li>A native SQL parser that supports both ANSI-SQL as well as Hive QL</li>
  <li>Native DDL command implementations</li>
  <li>Subquery support, including
    <ul>
      <li>Uncorrelated Scalar Subqueries</li>
      <li>Correlated Scalar Subqueries</li>
      <li>NOT IN predicate Subqueries (in WHERE/HAVING clauses)</li>
      <li>IN predicate subqueries (in WHERE/HAVING clauses)</li>
      <li>(NOT) EXISTS predicate subqueries (in WHERE/HAVING clauses)</li>
    </ul>
  </li>
  <li>View canonicalization support</li>
</ul>

<p>In addition, when building without Hive support, Spark SQL should have almost all the functionality as when building with Hive support, with the exception of Hive connectivity, Hive UDFs, and script transforms.</p>

<h4 id="new-features">New Features</h4>

<ul>
  <li>Native CSV data source, based on Databricks&#8217; <a href="https://github.com/databricks/spark-csv">spark-csv module</a></li>
  <li>Off-heap memory management for both caching and runtime execution</li>
  <li>Hive style bucketing support</li>
  <li>Approximate summary statistics using sketches, including approximate quantile, Bloom filter, and count-min sketch.</li>
</ul>

<h4 id="performance-and-runtime">Performance and Runtime</h4>

<ul>
  <li>Substantial (2 - 10X) performance speedups for common operators in SQL and DataFrames via a new technique called whole stage code generation.</li>
  <li>Improved Parquet scan throughput through vectorization</li>
  <li>Improved ORC performance</li>
  <li>Many improvements in the Catalyst query optimizer for common workloads</li>
  <li>Improved window function performance via native implementations for all window functions</li>
  <li>Automatic file coalescing for native data sources</li>
</ul>

<h3 id="mllib">MLlib</h3>
<p>The DataFrame-based API is now the primary API. The RDD-based API is entering maintenance mode. See the MLlib guide for details</p>

<h4 id="new-features-1">New features</h4>

<ul>
  <li>ML persistence: The DataFrames-based API provides near-complete support for saving and loading ML models and Pipelines in Scala, Java, Python, and R.  See this blog post for details.  (SPARK-6725, SPARK-11939, SPARK-14311)</li>
  <li>MLlib in R: SparkR now offers MLlib APIs for generalized linear models, naive Bayes, k-means clustering, and survival regression.  See this talk to learn more.</li>
  <li>Python: PySpark now offers many more MLlib algorithms, including LDA, Gaussian Mixture Model, Generalized Linear Regression, and more.</li>
  <li>Algorithms added to DataFrames-based API: Bisecting K-Means clustering, Gaussian Mixture Model, MaxAbsScaler feature transformer.</li>
</ul>

<p>This talk lists many of these new features.</p>

<h4 id="speedscaling">Speed/scaling</h4>
<p>Vectors and Matrices stored in DataFrames now use much more efficient serialization, reducing overhead in calling MLlib algorithms. (SPARK-14850)</p>

<h3 id="sparkr">SparkR</h3>

<p>The largest improvement to SparkR in Spark 2.0 is user-defined functions. There are three user-defined functions: dapply, gapply, and lapply. The first two can be used to do partition-based UDFs using dapply and gapply, e.g. partitioned model learning. The latter can be used to do hyper-parameter tuning.</p>

<p>In addition, there are a number of new features:</p>

<ul>
  <li>Improved algorithm coverage for machine learning in R, including naive Bayes, k-means clustering, and survival regression.</li>
  <li>Generalized linear models support more families and link functions.</li>
  <li>Save and load for all ML models.</li>
  <li>More DataFrame functionality: Window functions API, reader, writer support for JDBC, CSV, SparkSession</li>
</ul>

<h3 id="streaming">Streaming</h3>

<p>Spark 2.0 ships the initial experimental release for Structured Streaming, a high level streaming API built on top of Spark SQL and the Catalyst optimizer. Structured Streaming enables users to program against streaming sources and sinks using the same DataFrame/Dataset API as in static data sources, leveraging the Catalyst optimizer to automatically incrementalize the query plans.</p>

<p>For the DStream API, the most prominent update is the new experimental support for Kafka 0.10.</p>

<h3 id="dependency-and-packaging-improvements">Dependency and Packaging Improvements</h3>

<p>There are a variety of changes to Spark&#8217;s operations and packaging process:</p>

<ul>
  <li>Spark 2.0 no longer requires a fat assembly jar for production deployment.</li>
  <li>Akka dependency has been removed, and as a result, user applications can program against any versions of Akka.</li>
  <li>Kryo version is bumped to 3.0.</li>
  <li>The default build is now using Scala 2.11 rather than Scala 2.10.</li>
</ul>

<h3 id="removals-behavior-changes-and-deprecations">Removals, Behavior Changes and Deprecations</h3>

<h4 id="removals">Removals</h4>
<p>The following features have been removed in Spark 2.0:</p>

<ul>
  <li>Bagel</li>
  <li>Support for Hadoop 2.1 and earlier</li>
  <li>The ability to configure closure serializer</li>
  <li>HTTPBroadcast</li>
  <li>TTL-based metadata cleaning</li>
  <li>Semi-private class org.apache.spark.Logging. We suggest you use slf4j directly.</li>
  <li>SparkContext.metricsSystem</li>
  <li>Block-oriented integration with Tachyon (subsumed by file system integration)</li>
  <li>Methods deprecated in Spark 1.x</li>
  <li>Methods on Python DataFrame that returned RDDs (map, flatMap, mapPartitions, etc). They are still available in dataframe.rdd field, e.g. dataframe.rdd.map.</li>
  <li>Less frequently used streaming connectors, including Twitter, Akka, MQTT, ZeroMQ</li>
  <li>Hash-based shuffle manager</li>
  <li>History serving functionality from standalone Master</li>
  <li>For Java and Scala, DataFrame no longer exists as a class. As a result, data sources would need to be updated.</li>
  <li>Spark EC2 script has been fully moved to an <a href="https://github.com/amplab/spark-ec2">external repository hosted by the UC Berkeley AMPLab</a></li>
</ul>

<h4 id="behavior-changes">Behavior Changes</h4>
<p>The following changes might require updating existing applications that depend on the old behavior or API.</p>

<ul>
  <li>The default build is now using Scala 2.11 rather than Scala 2.10.</li>
  <li>In SQL, floating literals are now parsed as decimal data type rather than double data type.</li>
  <li>Kryo version is bumped to 3.0.</li>
  <li>Java RDD’s flatMap and mapPartitions functions used to require functions returning Java Iterable. They have been updated to require functions returning Java iterator so the functions do not need to materialize all the data.</li>
  <li>Java RDD’s countByKey and countAprroxDistinctByKey now returns a map from K to java.lang.Long, rather than to java.lang.Object.</li>
  <li>When writing Parquet files, the summary files are not written by default. To re-enable it, users must set “parquet.enable.summary-metadata” to true.</li>
  <li>The DataFrame-based API (spark.ml) now depends upon local linear algebra in spark.ml.linalg, rather than in spark.mllib.linalg.  This removes the last dependencies of spark.ml.* on spark.mllib.*. (SPARK-13944) See the MLlib migration guide for a full list of API changes.</li>
</ul>

<p>For a more complete list, please see <a href="https://issues.apache.org/jira/browse/SPARK-11806">SPARK-11806</a> for deprecations and removals.</p>

<h4 id="deprecations">Deprecations</h4>
<p>The following features have been deprecated in Spark 2.0, and might be removed in future versions of Spark 2.x:</p>

<ul>
  <li>Fine-grained mode in Apache Mesos</li>
  <li>Support for Java 7</li>
  <li>Support for Python 2.6</li>
</ul>

<h3 id="known-issues">Known Issues</h3>

<ul>
  <li>Lead and Lag&#8217;s behaviors have been changed to ignoring nulls from respecting nulls (1.6&#8217;s behaviors). In 2.0.1, the behavioral changes will be fixed in 2.0.1 (SPARK-16721).</li>
  <li>Lead and Lag functions using constant input values does not return the default value when the offset row does not exist (SPARK-16633).</li>
</ul>

<h3 id="credits">Credits</h3>
<p>Last but not least, this release would not have been possible without the following contributors: Aaron Tokhy, Abhinav Gupta, Abou Haydar Elias, Adam Budde, Adam Roberts, Ahmed Kamal, Ahmed Mahran, Alex Bozarth, Alexander Ulanov, Allen, Anatoliy Plastinin, Andrew, Andrew Ash, Andrew Or, Andrew Ray, Anthony Truchet, Antonio Murgia, Arun Allamsetty, Azeem Jiva, Ben McCann, BenFradet, Bertrand Bossy, Bill Chambers, Bjorn Jonsson, Bo Meng, Brandon Bradley, Brian O&#8217;Neill, BrianLondon, Bryan Cutler, Burak Köse, Burak Yavuz, Carson Wang, Cazen, Charles Allen, Cheng Hao, Cheng Lian, Claes Redestad, CodingCat, DB Tsai, DLucky, Daniel Jalova, Daoyuan Wang, Darek Blasiak, David Tolpin, Davies Liu, Devaraj K, Dhruve Ashar, Dilip Biswal, Dmitry Erastov, Dominik Jastrzębski, Dongjoon Hyun, Earthson Lu, Egor Pakhomov, Ehsan M.Kermani, Ergin Seyfe, Eric Liang, Ernest, Felix Cheung, Feynman Liang, Fokko Driesprong, Franklyn D&#8217;souza, François Garillot, Gabriele Nizzoli, Gary King, GayathriMurali, Gio Borje, Grace, Grzegorz Chilkiewicz, Guillaume Poulin, Gábor Lipták, Hemant Bhanawat, Herman van Hovell, Herman van Hövell tot Westerflier, Hiroshi Inoue, Holden Karau, Hossein, Huaxin Gao, Imran Rashid, Imran Younus, Ioana Delaney, Iulian Dragos, Jacek Laskowski, Jacek Lewandowski, Jakob Odersky, James Lohse, James Thomas, Jason Lee, Jason Moore, Jason White, Jean-Baptiste Onofré, Jeff L, Jeff Zhang, Jeremy Derr, JeremyNixon, Jo Voordeckers, Joan, Jon Maurer, Joseph K. Bradley, Josh Howes, Josh Rosen, Joshi, Juarez Bochi, Julien Baley, Junyang, Junyang Qian, Jurriaan Pruis, Kai Jiang, KaiXinXiaoLei, Kay Ousterhout, Kazuaki Ishizaki, Kevin Yu, Koert Kuipers, Kousuke Saruta, Koyo Yoshida, Krishna Kalyan, Lewuathe, Liang-Chi Hsieh, Lianhui Wang, Lin Zhao, Lining Sun, Liu Xiang, Liwei Lin, Luc Bourlier, Luciano Resende, Lukasz, Maciej Brynski, Malte, Marcelo Vanzin, Marcin Tustin, Mark Grover, Martin Menestret, Masayoshi TSUZUKI, Matei Zaharia, Matthew Wise, Michael Allman, Michael Armbrust, Michael Gummelt, Michel Lemay, Mike Dusenberry, Mortada Mehyar, Nakul Jindal, Nam Pham, Narine Kokhlikyan, NarineK, Neelesh Srinivas Salian, Nezih Yigitbasi, Nicholas Chammas, Nicholas Tietz, Nick Pentreath, Nilanjan Raychaudhuri, Nirman Narang, Nishkam Ravi, Nong, Nong Li, Oleg Danilov, Oliver Pierson, Oscar D. Lara Yejas, Parth Brahmbhatt, Patrick Wendell, Pete Robbins, Peter Ableda, Prajwal Tuladhar, Prashant Sharma, Pravin Gadakh, QiangCai, Qifan Pu, Raafat Akkad, Rahul Tanwani, Rajesh Balamohan, Rekha Joshi, Reynold Xin, Richard W. Eggert II, Robert Dodier, Robert Kruszewski, Robin East, Ruifeng Zheng, Ryan Blue, Sameer Agarwal, Sandeep Singh, Sanket, Sasaki Toru, Sean Owen, Sean Zhong, Sebastien Rainville, Sebastián Ramírez, Sela, Sergiusz Urbaniak, Shally Sangal, Sheamus K. Parkes, Shivaram Venkataraman, Shixiong Zhu, Shuai Lin, Shubhanshu Mishra, Sital Kedia, Stavros Kontopoulos, Stephan Kessler, Steve Loughran, Subhobrata Dey, Subroto Sanyal, Sumedh Mungee, Sun Rui, Sunitha Kambhampati, Takahashi Hiroshi, Takeshi YAMAMURO, Takuya Kuwahara, Takuya UESHIN, Tathagata Das, Tejas Patil, Terence Yim, Thomas Graves, Timothy Chen, Timothy Hunter, Tom Graves, Tom Magrino, Tommy YU, Travis Crawford, Tristan Reid, Victor Chima, Villu Ruusmann, Wayne Song, WeichenXu, Weiqing Yang, Wenchen Fan, Wesley Tang, Wilson Wu, Wojciech Jurczyk, Xiangrui Meng, Xin Ren, Xin Wu, Xinh Huynh, Xiu Guo, Xusen Yin, Yadong Qi, Yanbo Liang, Yash Datta, Yin Huai, Yonathan Randolph, Yong Gang Cao, Yong Tang, Yu ISHIKAWA, Yucai Yu, Yuhao Yang, Yury Liavitski, Zhang, Liye, Zheng RuiFeng, Zheng Tan, aokolnychyi, bomeng, catapan, cody koeninger, dding3, depend, echo2mei, felixcheung, frreiss, fwang1, gatorsmile, guoxu1231, huangzhaowei, hushan, hyukjinkwon, jayadevanmurali, jeanlyn, jerryshao, jliwork, junhao, kaklakariada, krishnakalyan3, lfzCarlosC, lgieron, mark800, mathieu longtin, mcheah, meiyoula, movelikeriver, mwws, nfraison, oraviv, peng.zhang, petermaxlee, pierre-borckmans, poolis, prabs, proflin, pshearer, rotems, sachin aggarwal, sandy, scwf, seddonm1, sethah, sharkd, shijinkui, sureshthalamati, tedyu, thomastechs, tmnd1991, vijaykiran, wangfei, wangyang, wm624@hotmail.com, wujian, xin Wu, yzhou2001, zero323, zhonghaihua, zhuol, zlpmichelle, Örjan Lundberg, Yang Bo.</p>


<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are <a href="https://www.apache.org/foundation/marks/">trademarks</a> of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

</body>
</html>