summaryrefslogblamecommitdiff
path: root/site/index.html
blob: 8f245e678110b5f4bd54b43330d03ef93f78ebab (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14













                                            
                                                  

                                                       



                                                    



                                                                             









                                                                                                                     
 









                                                              
           

























                                                                                        

                                                                                                                                                                                           
      


                                                                                                 




                                             

          












































                                                                                               
                                                                    





                                                                                                                                                                                                                         
                                                                                                                                              

                                                                              
                                                                                                        

                                            





                                                                                                                                                                                                                                                                                                                                                                                                                        

                                      




                                                                                                                                                                                                                                                                            













                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          

                                                                                                                                                    

                                                                                                                                                                                 

                                                                                                                                       
                                                                                                                                                                           
    
























                                                                                                                                                                      





                                                                                                                                              









                           
<!DOCTYPE html>
<!--[if IE 6]>
<html id="ie6" dir="ltr" lang="en-US">
<![endif]-->
<!--[if IE 7]>
<html id="ie7" dir="ltr" lang="en-US">
<![endif]-->
<!--[if IE 8]>
<html id="ie8" dir="ltr" lang="en-US">
<![endif]-->
<!--[if !(IE 6) | !(IE 7) | !(IE 8)  ]><!-->
<html dir="ltr" lang="en-US">
<!--<![endif]-->
<head>
  <link rel="shortcut icon" href="/favicon.ico" />
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width" />
  <title>
     Apache Spark - Lightning-Fast Cluster Computing
    
  </title>

  <link rel="stylesheet" type="text/css" media="all" href="/css/style.css" />
  <link rel="stylesheet" href="/css/pygments-default.css">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {  
    try { 
      _gaq.push(['_trackEvent', category , action]); 
    } catch(err){}
 
    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <link rel='canonical' href='/index.html' />

  <style type="text/css">
    #site-title,
    #site-description {
      position: absolute !important;
      clip: rect(1px 1px 1px 1px); /* IE6, IE7 */
      clip: rect(1px, 1px, 1px, 1px);
    }
  </style>
  <style type="text/css" id="custom-background-css">
    body.custom-background { background-color: #f1f1f1; }
  </style>
</head>

<!--body class="page two-column right-sidebar"-->
<body class="page">
<div id="page" class="hfeed">

  <header id="branding" role="banner">
  <hgroup>
    <h1 id="site-title"><span><a href="/" title="Spark" rel="home">Spark</a></span></h1>
    <h2 id="site-description">Lightning-Fast Cluster Computing</h2>
  </hgroup>

  <a id="main-logo" href="/">
    <img style="height:175px; width:auto;" src="/images/spark-project-header1-cropped.png" alt="Spark: Lightning-Fast Cluster Computing" title="Spark: Lightning-Fast Cluster Computing" />
  </a>
  <div class="widget-summit">
    <a href="http://spark-summit.org"><img src="/images/Summit-Logo-FINALtr-150x150px.png" /></a>
    <div class="text">
      <a href="http://spark-summit.org/2013">
        
        <strong>Videos and Slides<br/>
        Available Now!</strong>
      </a>
    </div>
  </div>

  <nav id="access" role="navigation">
    <h3 class="assistive-text">Main menu</h3>
    <div class="menu-main-menu-container">
      <ul id="menu-main-menu" class="menu">
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-item">
          <a href="/index.html">Home</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/downloads.html">Downloads</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/documentation.html">Documentation</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/examples.html">Examples</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/mailing-lists.html">Mailing Lists</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/research.html">Research</a>
        </li>
        
        <li class="menu-item menu-item-type-post_type menu-item-object-page ">
          <a href="/faq.html">FAQ</a>
        </li>
        
      </ul></div>
  </nav><!-- #access -->
</header><!-- #branding -->



  <div id="main">
    <div id="primary">
      <div id="content" role="main">
        
          <article class="page type-page status-publish hentry">
            <h2 id="what-is-apache-spark">What is Apache Spark?</h2>

<p>Apache Spark is an open source cluster computing system that aims to make data analytics <em>fast</em> — both fast to run and fast to write.</p>

<p>To run programs faster, Spark offers a general execution model that can optimize arbitrary operator graphs, and supports in-memory computing, which lets it query data faster than disk-based engines like Hadoop.</p>

<p>To make programming faster, Spark provides clean, concise APIs in
<a href="http://www.scala-lang.org" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://www.scala-lang.org']);">Scala</a>,
<a href="/docs/latest/quick-start.html#a-standalone-app-in-java">Java</a> and
<a href="/docs/latest/quick-start.html#a-standalone-app-in-python">Python</a>.
You can also use Spark interactively from the Scala and Python shells to rapidly query big datasets.</p>

<h2 id="what-can-it-do">What can it do?</h2>

<p>Spark was initially developed for two  applications where placing data in memory helps: <em>iterative</em> algorithms, which are common in machine learning, and <em>interactive</em> data mining. In both cases, Spark can run up to <b>100x</b> faster than Hadoop MapReduce. However, you can use Spark for general data processing too. Check out our <a href="/examples.html">example jobs</a>.</p>

<p>Spark is also the engine behind <a href="http://shark.cs.berkeley.edu" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://shark.cs.berkeley.edu']);">Shark</a>, a fully <a href="http://hive.apache.org" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://hive.apache.org']);">Apache Hive</a>-compatible data warehousing system that can run 100x faster than Hive.</p>

<p>While Spark is a new engine, it can access any data source supported by Hadoop, making it easy to run over existing data.</p>

<h2 id="who-uses-it">Who uses it?</h2>
<p>Spark was initially created in the <a href="https://amplab.cs.berkeley.edu" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://amplab.cs.berkeley.edu']);">UC Berkeley AMPLab</a>, but is now being used and developed at a wide array of companies.
See our <a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">powered by page</a> for a list of users,
and our <a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">list of committers</a>.
In total, over 25 companies have contributed code to Spark.
Spark is <a href="https://github.com/apache/incubator-spark" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://github.com']);">open source</a> under an Apache license, so <a href="/downloads.html">download</a> it to try it out.</p>

<h2 id="apache-incubator-notice">Apache Incubator notice</h2>
<p>Apache Spark is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.</p>


          </article><!-- #post -->
        
      </div><!-- #content -->
      
      <div id="secondary" class="widget-area" role="complementary">
        
<h3 class="widget-title">Latest News</h3>
<div class="latestnewswidget">
    
      <div><a href="/news/spark-summit-2013-is-a-wrap.html">Spark Summit 2013 is a Wrap</a> <span class="post-info">(December 15, 2013)</span></div>
    
      <div><a href="/news/announcing-the-first-spark-summit.html">Announcing the first Spark Summit: December 2, 2013</a> <span class="post-info">(October 08, 2013)</span></div>
    
      <div><a href="/news/spark-0-8-0-released.html">Spark 0.8.0 released</a> <span class="post-info">(September 25, 2013)</span></div>
    
      <div><a href="/news/spark-user-survey-and-powered-by-page.html">Spark user survey and "Powered By" page</a> <span class="post-info">(September 05, 2013)</span></div>
    
  </div>

<div style="text-align:right"><a href="/news/index.html">News Archive</a></div>

<p><!-- Not porting the following to Pygments since it becomes a lot less colorful --></p>

<div class="code" style="margin-top: 20px;">
    file = spark.textFile(<span class="string">"hdfs://..."</span>)<br />
    &nbsp;<br />
    file.<span class="sparkop">flatMap</span>(<span class="closure">line =&gt; line.split(" ")</span>)<br />
    &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">map</span>(<span class="closure">word =&gt; (word, 1)</span>)<br />
    &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">reduceByKey</span>(<span class="closure">_ + _</span>)
  </div>
<div class="caption">Word Count implemented in Spark</div>

<div align="center" style="margin-top: 20px;">
    <img src="/images/spark-lr.png" alt="Logistic regression performance in Spark vs Hadoop" />
  </div>
<div class="caption">Logistic regression in Spark vs Hadoop</div>
<h2 style="text-align:center"><a href="/downloads"><img src="/images/download.png" alt="Download" style="vertical-align: middle" />&nbsp;&nbsp;Download Spark</a></h2>

      </div>
      
      <footer id="colophon" role="contentinfo">
  <div id="site-generator">
    <p style="padding-top: 0; padding-bottom: 15px;">
      Apache Spark is an effort undergoing incubation at The Apache Software Foundation.
      <a href="http://incubator.apache.org/" style="border: none;">
        <img style="vertical-align: middle; border: none;" src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" />
      </a>  
    </p>
  </div>
</footer><!-- #colophon -->

    </div><!-- #primary -->
  </div><!-- #main -->
</div><!-- #page -->


</body>
</html>