1 files changed, 771 insertions, 0 deletions
diff --git a/site/contributing.html b/site/contributing.html
new file mode 100644
index 000000000..72b529269
--- /dev/null
+++ b/site/contributing.html
@@ -0,0 +1,771 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
+  <title>
+     Contributing to Spark | Apache Spark
+    
+  </title>
+
+  
+
+  
+
+  <!-- Bootstrap core CSS -->
+  <link href="/css/cerulean.min.css" rel="stylesheet">
+  <link href="/css/custom.css" rel="stylesheet">
+
+  <!-- Code highlighter CSS -->
+  <link href="/css/pygments-default.css" rel="stylesheet">
+
+  <script type="text/javascript">
+  <!-- Google Analytics initialization -->
+  var _gaq = _gaq || [];
+  _gaq.push(['_setAccount', 'UA-32518208-2']);
+  _gaq.push(['_trackPageview']);
+  (function() {
+    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+  })();
+
+  <!-- Adds slight delay to links to allow async reporting -->
+  function trackOutboundLink(link, category, action) {
+    try {
+      _gaq.push(['_trackEvent', category , action]);
+    } catch(err){}
+
+    setTimeout(function() {
+      document.location.href = link.href;
+    }, 100);
+  }
+  </script>
+
+  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
+  <!--[if lt IE 9]>
+  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
+  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
+  <![endif]-->
+</head>
+
+<body>
+
+<script src="https://code.jquery.com/jquery.js"></script>
+<script src="https://netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
+<script src="/js/lang-tabs.js"></script>
+<script src="/js/downloads.js"></script>
+
+<div class="container" style="max-width: 1200px;">
+
+<div class="masthead">
+  
+    <p class="lead">
+      <a href="/">
+      <img src="/images/spark-logo-trademark.png"
+        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
+          Lightning-fast cluster computing
+      </span>
+    </p>
+  
+</div>
+
+<nav class="navbar navbar-default" role="navigation">
+  <!-- Brand and toggle get grouped for better mobile display -->
+  <div class="navbar-header">
+    <button type="button" class="navbar-toggle" data-toggle="collapse"
+            data-target="#navbar-collapse-1">
+      <span class="sr-only">Toggle navigation</span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+      <span class="icon-bar"></span>
+    </button>
+  </div>
+
+  <!-- Collect the nav links, forms, and other content for toggling -->
+  <div class="collapse navbar-collapse" id="navbar-collapse-1">
+    <ul class="nav navbar-nav">
+      <li><a href="/downloads.html">Download</a></li>
+      <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+          Libraries <b class="caret"></b>
+        </a>
+        <ul class="dropdown-menu">
+          <li><a href="/sql/">SQL and DataFrames</a></li>
+          <li><a href="/streaming/">Spark Streaming</a></li>
+          <li><a href="/mllib/">MLlib (machine learning)</a></li>
+          <li><a href="/graphx/">GraphX (graph)</a></li>
+          <li class="divider"></li>
+          <li><a href="/third-party-projects.html">Third-Party Projects</a></li>
+        </ul>
+      </li>
+      <li class="dropdown">
+        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+          Documentation <b class="caret"></b>
+        </a>
+        <ul class="dropdown-menu">
+          <li><a href="/docs/latest/">Latest Release (Spark 2.0.2)</a></li>
+          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
+        </ul>
+      </li>
+      <li><a href="/examples.html">Examples</a></li>
+      <li class="dropdown">
+        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
+          Community <b class="caret"></b>
+        </a>
+        <ul class="dropdown-menu">
+          <li><a href="/community.html#mailing-lists">Mailing Lists</a></li>
+          <li><a href="/contributing.html">Contributing to Spark</a></li>
+          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
+          <li><a href="/community.html#events">Events and Meetups</a></li>
+          <li><a href="/community.html#history">Project History</a></li>
+          <li><a href="/powered-by.html">Powered By</a></li>
+          <li><a href="/committers.html">Project Committers</a></li>
+        </ul>
+      </li>
+      <li><a href="/faq.html">FAQ</a></li>
+    </ul>
+    <ul class="nav navbar-nav navbar-right">
+      <li class="dropdown">
+        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
+          Apache Software Foundation <b class="caret"></b></a>
+        <ul class="dropdown-menu">
+          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
+          <li><a href="http://www.apache.org/licenses/">License</a></li>
+          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
+          <li><a href="http://www.apache.org/security/">Security</a></li>
+        </ul>
+      </li>
+    </ul>
+  </div>
+  <!-- /.navbar-collapse -->
+</nav>
+
+
+<div class="row">
+  <div class="col-md-3 col-md-push-9">
+    <div class="news" style="margin-bottom: 20px;">
+      <h5>Latest News</h5>
+      <ul class="list-unstyled">
+        
+          <li><a href="/news/spark-wins-cloudsort-100tb-benchmark.html">Spark wins CloudSort Benchmark as the most efficient engine</a>
+          <span class="small">(Nov 15, 2016)</span></li>
+        
+          <li><a href="/news/spark-2-0-2-released.html">Spark 2.0.2 released</a>
+          <span class="small">(Nov 14, 2016)</span></li>
+        
+          <li><a href="/news/spark-1-6-3-released.html">Spark 1.6.3 released</a>
+          <span class="small">(Nov 07, 2016)</span></li>
+        
+          <li><a href="/news/spark-2-0-1-released.html">Spark 2.0.1 released</a>
+          <span class="small">(Oct 03, 2016)</span></li>
+        
+      </ul>
+      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
+    </div>
+    <div class="hidden-xs hidden-sm">
+      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
+        Download Spark
+      </a>
+      <p style="font-size: 16px; font-weight: 500; color: #555;">
+        Built-in Libraries:
+      </p>
+      <ul class="list-none">
+        <li><a href="/sql/">SQL and DataFrames</a></li>
+        <li><a href="/streaming/">Spark Streaming</a></li>
+        <li><a href="/mllib/">MLlib (machine learning)</a></li>
+        <li><a href="/graphx/">GraphX (graph)</a></li>
+      </ul>
+      <a href="/third-party-projects.html">Third-Party Projects</a>
+    </div>
+  </div>
+
+  <div class="col-md-9 col-md-pull-3">
+    <p>This guide documents the best way to make various types of contribution to Apache Spark, 
+including what is required before submitting a code change.</p>
+
+<p>Contributing to Spark doesn&#8217;t just mean writing code. Helping new users on the mailing list, 
+testing releases, and improving documentation are also welcome. In fact, proposing significant 
+code changes usually requires first gaining experience and credibility within the community by h
+elping in other ways. This is also a guide to becoming an effective contributor.</p>
+
+<p>So, this guide organizes contributions in order that they should probably be considered by new 
+contributors who intend to get involved long-term. Build some track record of helping others, 
+rather than just open pull requests.</p>
+
+<h2>Contributing by Helping Other Users</h2>
+
+<p>A great way to contribute to Spark is to help answer user questions on the <code>user@spark.apache.org</code> 
+mailing list or on StackOverflow. There are always many new Spark users; taking a few minutes to 
+help answer a question is a very valuable community service.</p>
+
+<p>Contributors should subscribe to this list and follow it in order to keep up to date on what&#8217;s 
+happening in Spark. Answering questions is an excellent and visible way to help the community, 
+which also demonstrates your expertise.</p>
+
+<p>See the <a href="/mailing-lists.html">Mailing Lists guide</a> for guidelines 
+about how to effectively participate in discussions on the mailing list, as well as forums 
+like StackOverflow.</p>
+
+<h2>Contributing by Testing Releases</h2>
+
+<p>Spark&#8217;s release process is community-oriented, and members of the community can vote on new 
+releases on the <code>dev@spark.apache.org</code> mailing list. Spark users are invited to subscribe to 
+this list to receive announcements, and test their workloads on newer release and provide 
+feedback on any performance or correctness issues found in the newer release.</p>
+
+<h2>Contributing by Reviewing Changes</h2>
+
+<p>Changes to Spark source code are proposed, reviewed and committed via 
+<a href="http://github.com/apache/spark/pulls">Github pull requests</a> (described later). 
+Anyone can view and comment on active changes here. 
+Reviewing others&#8217; changes is a good way to learn how the change process works and gain exposure 
+to activity in various parts of the code. You can help by reviewing the changes and asking 
+questions or pointing out issues &#8211; as simple as typos or small issues of style.
+See also https://spark-prs.appspot.com/ for a convenient way to view and filter open PRs.</p>
+
+<h2>Contributing Documentation Changes</h2>
+
+<p>To propose a change to <em>release</em> documentation (that is, docs that appear under 
+<a href="https://spark.apache.org/docs/">https://spark.apache.org/docs/</a>), 
+edit the Markdown source files in Spark&#8217;s 
+<a href="https://github.com/apache/spark/tree/master/docs"><code>docs/</code></a> directory, 
+whose <code>README</code> file shows how to build the documentation locally to test your changes.
+The process to propose a doc change is otherwise the same as the process for proposing code 
+changes below.</p>
+
+<p>To propose a change to the rest of the documentation (that is, docs that do <em>not</em> appear under 
+<a href="https://spark.apache.org/docs/">https://spark.apache.org/docs/</a>), similarly, edit the Markdown in the 
+<a href="https://github.com/apache/spark-website">spark-website repository</a> and open a pull request.</p>
+
+<h2>Contributing User Libraries to Spark</h2>
+
+<p>Just as Java and Scala applications can access a huge selection of libraries and utilities, 
+none of which are part of Java or Scala themselves, Spark aims to support a rich ecosystem of 
+libraries. Many new useful utilities or features belong outside of Spark rather than in the core. 
+For example: language support probably has to be a part of core Spark, but, useful machine 
+learning algorithms can happily exist outside of MLlib.</p>
+
+<p>To that end, large and independent new functionality is often rejected for inclusion in Spark 
+itself, but, can and should be hosted as a separate project and repository, and included in 
+the <a href="http://spark-packages.org/">spark-packages.org</a> collection.</p>
+
+<h2>Contributing Bug Reports</h2>
+
+<p>Ideally, bug reports are accompanied by a proposed code change to fix the bug. This isn&#8217;t 
+always possible, as those who discover a bug may not have the experience to fix it. A bug 
+may be reported by creating a JIRA but without creating a pull request (see below).</p>
+
+<p>Bug reports are only useful however if they include enough information to understand, isolate 
+and ideally reproduce the bug. Simply encountering an error does not mean a bug should be 
+reported; as below, search JIRA and search and inquire on the Spark user / dev mailing lists 
+first. Unreproducible bugs, or simple error reports, may be closed.</p>
+
+<p>It is possible to propose new features as well. These are generally not helpful unless 
+accompanied by detail, such as a design document and/or code change. Large new contributions 
+should consider <a href="http://spark-packages.org/">spark-packages.org</a> first (see above), 
+or be discussed on the mailing 
+list first. Feature requests may be rejected, or closed after a long period of inactivity.</p>
+
+<h2>Contributing to JIRA Maintenance</h2>
+
+<p>Given the sheer volume of issues raised in the Apache Spark JIRA, inevitably some issues are 
+duplicates, or become obsolete and eventually fixed otherwise, or can&#8217;t be reproduced, or could 
+benefit from more detail, and so on. It&#8217;s useful to help identify these issues and resolve them, 
+either by advancing the discussion or even resolving the JIRA. Most contributors are able to 
+directly resolve JIRAs. Use judgment in determining whether you are quite confident the issue 
+should be resolved, although changes can be easily undone. If in doubt, just leave a comment 
+on the JIRA.</p>
+
+<p>When resolving JIRAs, observe a few useful conventions:</p>
+
+<ul>
+  <li>Resolve as <strong>Fixed</strong> if there&#8217;s a change you can point to that resolved the issue
+    <ul>
+      <li>Set Fix Version(s), if and only if the resolution is Fixed</li>
+      <li>Set Assignee to the person who most contributed to the resolution, which is usually the person 
+who opened the PR that resolved the issue.</li>
+      <li>In case several people contributed, prefer to assign to the more &#8216;junior&#8217;, non-committer contributor</li>
+    </ul>
+  </li>
+  <li>For issues that can&#8217;t be reproduced against master as reported, resolve as <strong>Cannot Reproduce</strong>
+    <ul>
+      <li>Fixed is reasonable too, if it&#8217;s clear what other previous pull request resolved it. Link to it.</li>
+    </ul>
+  </li>
+  <li>If the issue is the same as or a subset of another issue, resolved as <strong>Duplicate</strong>
+    <ul>
+      <li>Make sure to link to the JIRA it duplicates</li>
+      <li>Prefer to resolve the issue that has less activity or discussion as the duplicate</li>
+    </ul>
+  </li>
+  <li>If the issue seems clearly obsolete and applies to issues or components that have changed 
+radically since it was opened, resolve as <strong>Not a Problem</strong></li>
+  <li>If the issue doesn&#8217;t make sense – not actionable, for example, a non-Spark issue, resolve 
+as <strong>Invalid</strong></li>
+  <li>If it&#8217;s a coherent issue, but there is a clear indication that there is not support or interest 
+in acting on it, then resolve as <strong>Won&#8217;t Fix</strong></li>
+  <li>Umbrellas are frequently marked <strong>Done</strong> if they are just container issues that don&#8217;t correspond 
+to an actionable change of their own</li>
+</ul>
+
+<h2>Preparing to Contribute Code Changes</h2>
+
+<h3>Choosing What to Contribute</h3>
+
+<p>Spark is an exceptionally busy project, with a new JIRA or pull request every few hours on average. 
+Review can take hours or days of committer time. Everyone benefits if contributors focus on 
+changes that are useful, clear, easy to evaluate, and already pass basic checks.</p>
+
+<p>Sometimes, a contributor will already have a particular new change or bug in mind. If seeking 
+ideas, consult the list of starter tasks in JIRA, or ask the <code>user@spark.apache.org</code> mailing list.</p>
+
+<p>Before proceeding, contributors should evaluate if the proposed change is likely to be relevant, 
+new and actionable:</p>
+
+<ul>
+  <li>Is it clear that code must change? Proposing a JIRA and pull request is appropriate only when a 
+clear problem or change has been identified. If simply having trouble using Spark, use the mailing 
+lists first, rather than consider filing a JIRA or proposing a change. When in doubt, email 
+<code>user@spark.apache.org</code> first about the possible change</li>
+  <li>Search the <code>user@spark.apache.org</code> and <code>dev@spark.apache.org</code> mailing list 
+<a href="/community.html#mailing-lists">archives</a> for 
+related discussions. Use <a href="http://search-hadoop.com/?q=&amp;fc_project=Spark">search-hadoop.com</a> 
+or similar search tools. 
+Often, the problem has been discussed before, with a resolution that doesn&#8217;t require a code 
+change, or recording what kinds of changes will not be accepted as a resolution.</li>
+  <li>Search JIRA for existing issues: 
+<a href="https://issues.apache.org/jira/browse/SPARK">https://issues.apache.org/jira/browse/SPARK</a></li>
+  <li>Type <code>spark [search terms]</code> at the top right search box. If a logically similar issue already 
+exists, then contribute to the discussion on the existing JIRA and pull request first, instead of 
+creating a new one.</li>
+  <li>Is the scope of the change matched to the contributor&#8217;s level of experience? Anyone is qualified 
+to suggest a typo fix, but refactoring core scheduling logic requires much more understanding of 
+Spark. Some changes require building up experience first (see above).</li>
+</ul>
+
+<h3>MLlib-specific Contribution Guidelines</h3>
+
+<p>While a rich set of algorithms is an important goal for MLLib, scaling the project requires 
+that maintainability, consistency, and code quality come first. New algorithms should:</p>
+
+<ul>
+  <li>Be widely known</li>
+  <li>Be used and accepted (academic citations and concrete use cases can help justify this)</li>
+  <li>Be highly scalable</li>
+  <li>Be well documented</li>
+  <li>Have APIs consistent with other algorithms in MLLib that accomplish the same thing</li>
+  <li>Come with a reasonable expectation of developer support.</li>
+  <li>Have <code>@Since</code> annotation on public classes, methods, and variables.</li>
+</ul>
+
+<h3>Code Review Criteria</h3>
+
+<p>Before considering how to contribute code, it&#8217;s useful to understand how code is reviewed, 
+and why changes may be rejected. Simply put, changes that have many or large positives, and 
+few negative effects or risks, are much more likely to be merged, and merged quickly. 
+Risky and less valuable changes are very unlikely to be merged, and may be rejected outright 
+rather than receive iterations of review.</p>
+
+<h4>Positives</h4>
+
+<ul>
+  <li>Fixes the root cause of a bug in existing functionality</li>
+  <li>Adds functionality or fixes a problem needed by a large number of users</li>
+  <li>Simple, targeted</li>
+  <li>Maintains or improves consistency across Python, Java, Scala</li>
+  <li>Easily tested; has tests</li>
+  <li>Reduces complexity and lines of code</li>
+  <li>Change has already been discussed and is known to committers</li>
+</ul>
+
+<h4>Negatives, Risks</h4>
+
+<ul>
+  <li>Band-aids a symptom of a bug only</li>
+  <li>Introduces complex new functionality, especially an API that needs to be supported</li>
+  <li>Adds complexity that only helps a niche use case</li>
+  <li>Adds user-space functionality that does not need to be maintained in Spark, but could be hosted 
+externally and indexed by <a href="http://spark-packages.org/">spark-packages.org</a></li>
+  <li>Changes a public API or semantics (rarely allowed)</li>
+  <li>Adds large dependencies</li>
+  <li>Changes versions of existing dependencies</li>
+  <li>Adds a large amount of code</li>
+  <li>Makes lots of modifications in one &#8220;big bang&#8221; change</li>
+</ul>
+
+<h2>Contributing Code Changes</h2>
+
+<p>Please review the preceding section before proposing a code change. This section documents how to do so.</p>
+
+<p><strong>When you contribute code, you affirm that the contribution is your original work and that you 
+license the work to the project under the project&#8217;s open source license. Whether or not you state 
+this explicitly, by submitting any copyrighted material via pull request, email, or other means 
+you agree to license the material under the project&#8217;s open source license and warrant that you 
+have the legal authority to do so.</strong></p>
+
+<h3>JIRA</h3>
+
+<p>Generally, Spark uses JIRA to track logical issues, including bugs and improvements, and uses 
+Github pull requests to manage the review and merge of specific code changes. That is, JIRAs are 
+used to describe <em>what</em> should be fixed or changed, and high-level approaches, and pull requests 
+describe <em>how</em> to implement that change in the project&#8217;s source code. For example, major design 
+decisions are discussed in JIRA.</p>
+
+<ol>
+  <li>Find the existing Spark JIRA that the change pertains to.
+    <ol>
+      <li>Do not create a new JIRA if creating a change to address an existing issue in JIRA; add to 
+ the existing discussion and work instead</li>
+      <li>Look for existing pull requests that are linked from the JIRA, to understand if someone is 
+ already working on the JIRA</li>
+    </ol>
+  </li>
+  <li>If the change is new, then it usually needs a new JIRA. However, trivial changes, where the
+what should change is virtually the same as the how it should change do not require a JIRA. 
+Example: <code>Fix typos in Foo scaladoc</code></li>
+  <li>If required, create a new JIRA:
+    <ol>
+      <li>Provide a descriptive Title. &#8220;Update web UI&#8221; or &#8220;Problem in scheduler&#8221; is not sufficient.
+ &#8220;Kafka Streaming support fails to handle empty queue in YARN cluster mode&#8221; is good.</li>
+      <li>Write a detailed Description. For bug reports, this should ideally include a short 
+ reproduction of the problem. For new features, it may include a design document.</li>
+      <li>Set required fields:
+        <ol>
+          <li><strong>Issue Type</strong>. Generally, Bug, Improvement and New Feature are the only types used in Spark.</li>
+          <li><strong>Priority</strong>. Set to Major or below; higher priorities are generally reserved for 
+ committers to set. JIRA tends to unfortunately conflate &#8220;size&#8221; and &#8220;importance&#8221; in its 
+ Priority field values. Their meaning is roughly:
+            <ol>
+              <li>Blocker: pointless to release without this change as the release would be unusable 
+  to a large minority of users</li>
+              <li>Critical: a large minority of users are missing important functionality without 
+  this, and/or a workaround is difficult</li>
+              <li>Major: a small minority of users are missing important functionality without this, 
+  and there is a workaround</li>
+              <li>Minor: a niche use case is missing some support, but it does not affect usage or 
+  is easily worked around</li>
+              <li>Trivial: a nice-to-have change but unlikely to be any problem in practice otherwise</li>
+            </ol>
+          </li>
+          <li><strong>Component</strong></li>
+          <li><strong>Affects Version</strong>. For Bugs, assign at least one version that is known to exhibit the 
+ problem or need the change</li>
+        </ol>
+      </li>
+      <li>Do not set the following fields:
+        <ol>
+          <li><strong>Fix Version</strong>. This is assigned by committers only when resolved.</li>
+          <li><strong>Target Version</strong>. This is assigned by committers to indicate a PR has been accepted for 
+ possible fix by the target version.</li>
+        </ol>
+      </li>
+      <li>Do not include a patch file; pull requests are used to propose the actual change.</li>
+    </ol>
+  </li>
+  <li>If the change is a large change, consider inviting discussion on the issue at 
+<code>dev@spark.apache.org</code> first before proceeding to implement the change.</li>
+</ol>
+
+<h3>Pull Request</h3>
+
+<ol>
+  <li><a href="https://help.github.com/articles/fork-a-repo/">Fork</a> the Github repository at 
+<a href="http://github.com/apache/spark">http://github.com/apache/spark</a> if you haven&#8217;t already</li>
+  <li>Clone your fork, create a new branch, push commits to the branch.</li>
+  <li>Consider whether documentation or tests need to be added or updated as part of the change, 
+and add them as needed.</li>
+  <li>Run all tests with <code>./dev/run-tests</code> to verify that the code still compiles, passes tests, and 
+passes style checks. If style checks fail, review the Code Style Guide below.</li>
+  <li><a href="https://help.github.com/articles/using-pull-requests/">Open a pull request</a> against 
+the <code>master</code> branch of <code>apache/spark</code>. (Only in special cases would the PR be opened against other branches.)
+    <ol>
+      <li>The PR title should be of the form <code>[SPARK-xxxx][COMPONENT] Title</code>, where <code>SPARK-xxxx</code> is 
+  the relevant JIRA number, <code>COMPONENT </code>is one of the PR categories shown at 
+  <a href="https://spark-prs.appspot.com/">spark-prs.appspot.com</a> and 
+  Title may be the JIRA&#8217;s title or a more specific title describing the PR itself.</li>
+      <li>If the pull request is still a work in progress, and so is not ready to be merged, 
+  but needs to be pushed to Github to facilitate review, then add <code>[WIP]</code> after the component.</li>
+      <li>Consider identifying committers or other contributors who have worked on the code being 
+  changed. Find the file(s) in Github and click &#8220;Blame&#8221; to see a line-by-line annotation of 
+  who changed the code last. You can add <code>@username</code> in the PR description to ping them 
+  immediately.</li>
+      <li>Please state that the contribution is your original work and that you license the work 
+  to the project under the project&#8217;s open source license.</li>
+    </ol>
+  </li>
+  <li>The related JIRA, if any, will be marked as &#8220;In Progress&#8221; and your pull request will 
+automatically be linked to it. There is no need to be the Assignee of the JIRA to work on it, 
+though you are welcome to comment that you have begun work.</li>
+  <li>The Jenkins automatic pull request builder will test your changes
+    <ol>
+      <li>If it is your first contribution, Jenkins will wait for confirmation before building 
+  your code and post &#8220;Can one of the admins verify this patch?&#8221;</li>
+      <li>A committer can authorize testing with a comment like &#8220;ok to test&#8221;</li>
+      <li>A committer can automatically allow future pull requests from a contributor to be 
+  tested with a comment like &#8220;Jenkins, add to whitelist&#8221;</li>
+    </ol>
+  </li>
+  <li>After about 2 hours, Jenkins will post the results of the test to the pull request, along 
+with a link to the full results on Jenkins.</li>
+  <li>Watch for the results, and investigate and fix failures promptly
+    <ol>
+      <li>Fixes can simply be pushed to the same branch from which you opened your pull request</li>
+      <li>Jenkins will automatically re-test when new commits are pushed</li>
+      <li>If the tests failed for reasons unrelated to the change (e.g. Jenkins outage), then a 
+  committer can request a re-test with &#8220;Jenkins, retest this please&#8221;. 
+  Ask if you need a test restarted.</li>
+    </ol>
+  </li>
+</ol>
+
+<h3>The Review Process</h3>
+
+<ul>
+  <li>Other reviewers, including committers, may comment on the changes and suggest modifications. 
+Changes can be added by simply pushing more commits to the same branch.</li>
+  <li>Lively, polite, rapid technical debate is encouraged from everyone in the community. The outcome 
+may be a rejection of the entire change.</li>
+  <li>Reviewers can indicate that a change looks suitable for merging with a comment such as: &#8220;I think 
+this patch looks good&#8221;. Spark uses the LGTM convention for indicating the strongest level of 
+technical sign-off on a patch: simply comment with the word &#8220;LGTM&#8221;. It specifically means: &#8220;I&#8217;ve 
+looked at this thoroughly and take as much ownership as if I wrote the patch myself&#8221;. If you 
+comment LGTM you will be expected to help with bugs or follow-up issues on the patch. Consistent, 
+judicious use of LGTMs is a great way to gain credibility as a reviewer with the broader community.</li>
+  <li>Sometimes, other changes will be merged which conflict with your pull request&#8217;s changes. The 
+PR can&#8217;t be merged until the conflict is resolved. This can be resolved with <code>git fetch origin</code> 
+followed by <code>git merge origin/master</code> and resolving the conflicts by hand, then pushing the result 
+to your branch.</li>
+  <li>Try to be responsive to the discussion rather than let days pass between replies</li>
+</ul>
+
+<h3>Closing Your Pull Request / JIRA</h3>
+
+<ul>
+  <li>If a change is accepted, it will be merged and the pull request will automatically be closed, 
+along with the associated JIRA if any
+    <ul>
+      <li>Note that in the rare case you are asked to open a pull request against a branch besides 
+<code>master</code>, that you will actually have to close the pull request manually</li>
+      <li>The JIRA will be Assigned to the primary contributor to the change as a way of giving credit. 
+If the JIRA isn&#8217;t closed and/or Assigned promptly, comment on the JIRA.</li>
+    </ul>
+  </li>
+  <li>If your pull request is ultimately rejected, please close it promptly
+    <ul>
+      <li>&#8230; because committers can&#8217;t close PRs directly</li>
+      <li>Pull requests will be automatically closed by an automated process at Apache after about a 
+week if a committer has made a comment like &#8220;mind closing this PR?&#8221; This means that the 
+committer is specifically requesting that it be closed.</li>
+    </ul>
+  </li>
+  <li>If a pull request has gotten little or no attention, consider improving the description or 
+the change itself and ping likely reviewers again after a few days. Consider proposing a 
+change that&#8217;s easier to include, like a smaller and/or less invasive change.</li>
+  <li>If it has been reviewed but not taken up after weeks, after soliciting review from the 
+most relevant reviewers, or, has met with neutral reactions, the outcome may be considered a 
+&#8220;soft no&#8221;. It is helpful to withdraw and close the PR in this case.</li>
+  <li>If a pull request is closed because it is deemed not the right approach to resolve a JIRA, 
+then leave the JIRA open. However if the review makes it clear that the issue identified in 
+the JIRA is not going to be resolved by any pull request (not a problem, won&#8217;t fix) then also 
+resolve the JIRA.</li>
+</ul>
+
+<p><a name="code-style-guide"></a></p>
+<h2>Code Style Guide</h2>
+
+<p>Please follow the style of the existing codebase.</p>
+
+<ul>
+  <li>For Python code, Apache Spark follows 
+<a href="http://legacy.python.org/dev/peps/pep-0008/">PEP 8</a> with one exception: 
+lines can be up to 100 characters in length, not 79.</li>
+  <li>For Java code, Apache Spark follows 
+<a href="http://www.oracle.com/technetwork/java/codeconvtoc-136057.html">Oracle&#8217;s Java code conventions</a>. 
+Many Scala guidelines below also apply to Java.</li>
+  <li>For Scala code, Apache Spark follows the official 
+<a href="http://docs.scala-lang.org/style/">Scala style guide</a>, but with the following changes, below.</li>
+</ul>
+
+<h3>Line Length</h3>
+
+<p>Limit lines to 100 characters. The only exceptions are import statements (although even for 
+those, try to keep them under 100 chars).</p>
+
+<h3>Indentation</h3>
+
+<p>Use 2-space indentation in general. For function declarations, use 4 space indentation for its 
+parameters when they don&#8217;t fit in a single line. For example:</p>
+
+<pre><code class="language-scala">// Correct:
+if (true) {
+  println("Wow!")
+}
+ 
+// Wrong:
+if (true) {
+    println("Wow!")
+}
+ 
+// Correct:
+def newAPIHadoopFile[K, V, F &lt;: NewInputFormat[K, V]](
+    path: String,
+    fClass: Class[F],
+    kClass: Class[K],
+    vClass: Class[V],
+    conf: Configuration = hadoopConfiguration): RDD[(K, V)] = {
+  // function body
+}
+ 
+// Wrong
+def newAPIHadoopFile[K, V, F &lt;: NewInputFormat[K, V]](
+  path: String,
+  fClass: Class[F],
+  kClass: Class[K],
+  vClass: Class[V],
+  conf: Configuration = hadoopConfiguration): RDD[(K, V)] = {
+  // function body
+}
+</code></pre>
+
+<h3>Code documentation style</h3>
+
+<p>For Scala doc / Java doc comment before classes, objects and methods, use Java docs style 
+instead of Scala docs style.</p>
+
+<pre><code class="language-scala">/** This is a correct one-liner, short description. */
+ 
+/**
+ * This is correct multi-line JavaDoc comment. And
+ * this is my second line, and if I keep typing, this would be
+ * my third line.
+ */
+ 
+/** In Spark, we don't use the ScalaDoc style so this
+  * is not correct.
+  */
+</code></pre>
+
+<p>For inline comment with the code, use <code>//</code> and not <code>/*  .. */</code>.</p>
+
+<pre><code class="language-scala">// This is a short, single line comment
+ 
+// This is a multi line comment.
+// Bla bla bla
+ 
+/*
+ * Do not use this style for multi line comments. This
+ * style of comment interferes with commenting out
+ * blocks of code, and also makes code comments harder
+ * to distinguish from Scala doc / Java doc comments.
+ */
+ 
+/**
+ * Do not use scala doc style for inline comments.
+ */
+</code></pre>
+
+<h3>Imports</h3>
+
+<p>Always import packages using absolute paths (e.g. <code>scala.util.Random</code>) instead of relative ones 
+(e.g. <code>util.Random</code>). In addition, sort imports in the following order 
+(use alphabetical order within each group):</p>
+<ul>
+  <li><code>java.*</code> and <code>javax.*</code></li>
+  <li><code>scala.*</code></li>
+  <li>Third-party libraries (<code>org.*</code>, <code>com.*</code>, etc)</li>
+  <li>Project classes (<code>org.apache.spark.*</code>)</li>
+</ul>
+
+<p>The <a href="https://plugins.jetbrains.com/plugin/7350">IntelliJ import organizer plugin</a> 
+can organize imports for you. Use this configuration for the plugin (configured under 
+Preferences / Editor / Code Style / Scala Imports Organizer):</p>
+
+<pre><code class="language-scala">import java.*
+import javax.*
+ 
+import scala.*
+ 
+import *
+ 
+import org.apache.spark.*
+</code></pre>
+
+<h3>Infix Methods</h3>
+
+<p>Don&#8217;t use infix notation for methods that aren&#8217;t operators. For example, instead of 
+<code>list map func</code>, use <code>list.map(func)</code>, or instead of <code>string contains "foo"</code>, use 
+<code>string.contains("foo")</code>. This is to improve familiarity to developers coming from other languages.</p>
+
+<h3>Curly Braces</h3>
+
+<p>Put curly braces even around one-line <code>if</code>, <code>else</code> or loop statements. The only exception is if 
+you are using <code>if/else</code> as an one-line ternary operator.</p>
+
+<pre><code class="language-scala">// Correct:
+if (true) {
+  println("Wow!")
+}
+ 
+// Correct:
+if (true) statement1 else statement2
+ 
+// Wrong:
+if (true)
+  println("Wow!")
+</code></pre>
+
+<h3>Return Types</h3>
+
+<p>Always specify the return types of methods where possible. If a method has no return type, specify 
+<code>Unit</code> instead in accordance with the Scala style guide. Return types for variables are not 
+required unless the definition involves huge code blocks with potentially ambiguous return values.</p>
+
+<pre><code class="language-scala">// Correct:
+def getSize(partitionId: String): Long = { ... }
+def compute(partitionId: String): Unit = { ... }
+ 
+// Wrong:
+def getSize(partitionId: String) = { ... }
+def compute(partitionId: String) = { ... }
+def compute(partitionId: String) { ... }
+ 
+// Correct:
+val name = "black-sheep"
+val path: Option[String] =
+  try {
+    Option(names)
+      .map { ns =&gt; ns.split(",") }
+      .flatMap { ns =&gt; ns.filter(_.nonEmpty).headOption }
+      .map { n =&gt; "prefix" + n + "suffix" }
+      .flatMap { n =&gt; if (n.hashCode % 3 == 0) Some(n + n) else None }
+  } catch {
+    case e: SomeSpecialException =&gt;
+      computePath(names)
+  }
+</code></pre>
+
+<h3>If in Doubt</h3>
+
+<p>If you&#8217;re not sure about the right style for something, try to follow the style of the existing 
+codebase. Look at whether there are other examples in the code that use your feature. Feel free 
+to ask on the <code>dev@spark.apache.org</code> list as well.</p>
+
+  </div>
+</div>
+
+
+
+<footer class="small">
+  <hr>
+  Apache Spark, Spark, Apache, and the Spark logo are <a href="/trademarks.html">trademarks</a> of
+  <a href="http://www.apache.org">The Apache Software Foundation</a>.
+</footer>
+
+</div>
+
+</body>
+</html>