diff options
Diffstat (limited to 'site/index.html')
-rw-r--r-- | site/index.html | 459 |
1 files changed, 308 insertions, 151 deletions
diff --git a/site/index.html b/site/index.html index d88e47d12..ad2305872 100644 --- a/site/index.html +++ b/site/index.html @@ -1,27 +1,20 @@ <!DOCTYPE html> -<!--[if IE 6]> -<html id="ie6" dir="ltr" lang="en-US"> -<![endif]--> -<!--[if IE 7]> -<html id="ie7" dir="ltr" lang="en-US"> -<![endif]--> -<!--[if IE 8]> -<html id="ie8" dir="ltr" lang="en-US"> -<![endif]--> -<!--[if !(IE 6) | !(IE 7) | !(IE 8) ]><!--> -<html dir="ltr" lang="en-US"> -<!--<![endif]--> +<html lang="en"> <head> - <link rel="shortcut icon" href="/favicon.ico" /> - <meta charset="UTF-8" /> - <meta name="viewport" content="width=device-width" /> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title> Apache Spark - Lightning-Fast Cluster Computing </title> - <link rel="stylesheet" type="text/css" media="all" href="/css/style.css" /> - <link rel="stylesheet" href="/css/pygments-default.css"> + + + <!-- Bootstrap core CSS --> + <link href="/css/cerulean.min.css" rel="stylesheet"> + <link href="/css/custom.css" rel="stylesheet"> <script type="text/javascript"> <!-- Google Analytics initialization --> @@ -46,175 +39,339 @@ } </script> - <link rel='canonical' href='/index.html' /> - - <style type="text/css"> - #site-title, - #site-description { - position: absolute !important; - clip: rect(1px 1px 1px 1px); /* IE6, IE7 */ - clip: rect(1px, 1px, 1px, 1px); - } - </style> - <style type="text/css" id="custom-background-css"> - body.custom-background { background-color: #f1f1f1; } - </style> + <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries --> + <!--[if lt IE 9]> + <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script> + <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script> + <![endif]--> </head> -<!--body class="page two-column right-sidebar"--> -<body class="page"> -<div id="page" class="hfeed"> - - <header id="branding" role="banner"> - <hgroup> - <h1 id="site-title"><span><a href="/" title="Spark" rel="home">Spark</a></span></h1> - <h2 id="site-description">Lightning-Fast Cluster Computing</h2> - </hgroup> - - <a id="main-logo" href="/"> - <img style="height:175px; width:auto;" src="/images/spark-project-header1-cropped.png" alt="Spark: Lightning-Fast Cluster Computing" title="Spark: Lightning-Fast Cluster Computing" /> - </a> - <div class="widget-summit"> - <a href="http://spark-summit.org"><img src="/images/Summit-Logo-FINALtr-150x150px.png" /></a> - <div class="text"> - <a href="http://spark-summit.org/2013"> - - <strong>Videos and Slides<br/> - Available Now!</strong> - </a> - </div> +<body> + +<div class="container" style="max-width: 1200px;"> + +<div class="masthead"> + + <p class="lead"> + <a href="/"> + <img src="/images/spark-logo.png" + style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline"> + Lightning-fast cluster computing + </span> + </p> + +</div> + +<nav class="navbar navbar-default" role="navigation"> + <!-- Brand and toggle get grouped for better mobile display --> + <div class="navbar-header"> + <button type="button" class="navbar-toggle" data-toggle="collapse" + data-target="#navbar-collapse-1"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> </div> - <nav id="access" role="navigation"> - <h3 class="assistive-text">Main menu</h3> - <div class="menu-main-menu-container"> - <ul id="menu-main-menu" class="menu"> - - <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-item"> - <a href="/index.html">Home</a> - </li> - - <li class="menu-item menu-item-type-post_type menu-item-object-page "> - <a href="/downloads.html">Downloads</a> - </li> - - <li class="menu-item menu-item-type-post_type menu-item-object-page "> - <a href="/documentation.html">Documentation</a> - </li> + <!-- Collect the nav links, forms, and other content for toggling --> + <div class="collapse navbar-collapse" id="navbar-collapse-1"> + <ul class="nav navbar-nav"> + <li><a href="/downloads.html">Download</a></li> + <li class="dropdown"> + <a href="#" class="dropdown-toggle" data-toggle="dropdown"> + Related Projects <b class="caret"></b> + </a> + <ul class="dropdown-menu"> + <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li> + <li><a href="/streaming/">Spark Streaming</a></li> + <li><a href="/mllib/">MLlib (machine learning)</a></li> + <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li> + </ul> + </li> + <li class="dropdown"> + <a href="#" class="dropdown-toggle" data-toggle="dropdown"> + Documentation <b class="caret"></b> + </a> + <ul class="dropdown-menu"> + <li><a href="/documentation.html">Overview</a></li> + <li><a href="/docs/latest/">Latest Release</a></li> + <li><a href="/examples.html">Examples</a></li> + </ul> + </li> + <li class="dropdown"> + <a href="#" class="dropdown-toggle" data-toggle="dropdown"> + Community <b class="caret"></b> + </a> + <ul class="dropdown-menu"> + <li><a href="/community.html">Mailing Lists</a></li> + <li><a href="/community.html#events">Events and Meetups</a></li> + <li><a href="/community.html#history">Project History</a></li> + <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li> + </ul> + </li> + <li><a href="/faq.html">FAQ</a></li> + </ul> + </div> + <!-- /.navbar-collapse --> +</nav> + + +<div class="row"> + <div class="col-md-3 col-md-push-9"> + <div class="news" style="margin-bottom: 20px;"> + <h5>Latest News</h5> + <ul class="list-unstyled"> - <li class="menu-item menu-item-type-post_type menu-item-object-page "> - <a href="/examples.html">Examples</a> - </li> + <li><a href="/news/spark-0-8-1-released.html">Spark 0.8.1 released</a> + <span class="small">(Dec 19, 2013)</span></li> - <li class="menu-item menu-item-type-post_type menu-item-object-page "> - <a href="/mailing-lists.html">Mailing Lists</a> - </li> + <li><a href="/news/spark-summit-2013-is-a-wrap.html">Spark Summit 2013 is a Wrap</a> + <span class="small">(Dec 15, 2013)</span></li> - <li class="menu-item menu-item-type-post_type menu-item-object-page "> - <a href="/research.html">Research</a> - </li> + <li><a href="/news/announcing-the-first-spark-summit.html">Announcing the first Spark Summit: December 2, 2013</a> + <span class="small">(Oct 08, 2013)</span></li> - <li class="menu-item menu-item-type-post_type menu-item-object-page "> - <a href="/faq.html">FAQ</a> - </li> + <li><a href="/news/spark-0-8-0-released.html">Spark 0.8.0 released</a> + <span class="small">(Sep 25, 2013)</span></li> - </ul></div> - </nav><!-- #access --> -</header><!-- #branding --> + </ul> + <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p> + </div> + <div class="hidden-xs hidden-sm"> + <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;"> + Download Spark + </a> + <p style="font-size: 16px; font-weight: 500; color: #555;"> + Related Projects: + </p> + <ul class="list-narrow"> + <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li> + <li><a href="/streaming/">Spark Streaming</a></li> + <li><a href="/mllib/">MLlib (machine learning)</a></li> + <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li> + </ul> + </div> + </div> + <div class="col-md-9 col-md-pull-3"> + <div class="jumbotron"> + <b>Apache Spark</b> is a fast and general engine for large-scale data processing. +</div> +<div class="row row-padded"> + <div class="col-md-7 col-sm-7"> + <h2>Speed</h2> - <div id="main"> - <div id="primary"> - <div id="content" role="main"> - - <article class="page type-page status-publish hentry"> - <h2 id="what-is-apache-spark">What is Apache Spark?</h2> + <p class="lead"> + Run programs up to 100x faster than + Hadoop MapReduce in memory, or 10x faster on disk. + </p> + + <p> + Spark has an advanced DAG execution engine that supports cyclic data flow and + in-memory computing. + </p> + </div> + <div class="col-md-5 col-sm-5 col-padded-top col-center"> + <div style="width: 100%; max-width: 272px; display: inline-block; text-align: center;"> + <img src="/images/logistic-regression.png" style="width: 100%; max-width: 250px;" /> + <div class="caption" style="min-width: 272px;">Logistic regression in Hadoop and Spark</div> + </div> + </div> +</div> + +<div class="row row-padded"> + <div class="col-md-7 col-sm-7"> + <h2>Ease of Use</h2> + + <p class="lead"> + Write applications quickly in Java, Scala or Python. + </p> + + <p> + Spark offers over 80 high-level operators that make it easy to build parallel apps. + And you can use it <em>interactively</em> + from the Scala and Python shells. + </p> + </div> + <div class="col-md-5 col-sm-5 col-padded-top col-center"> + <div style="text-align: left; display: inline-block;"> + <div class="code"> + file = spark.textFile(<span class="string">"hdfs://..."</span>)<br /> + <br /> + file.<span class="sparkop">flatMap</span>(<span class="closure">line => line.split(" ")</span>)<br /> + .<span class="sparkop">map</span>(<span class="closure">word => (word, 1)</span>)<br /> + .<span class="sparkop">reduceByKey</span>(<span class="closure">_ + _</span>) + </div> + <div class="caption">Word count in Spark</div> + </div> + <!-- + <div class="code" style="margin-top: 20px; text-align: left; display: inline-block;"> + file = spark.textFile(<span class="string">"hdfs://..."</span>)<br/> + <br/> + file.<span class="sparkop">filter</span>(<span class="closure">lambda line: "ERROR" in line</span>)<br/> + .<span class="sparkop">count</span>() + </div> + --> + <!--<div class="caption">Word count in Spark</div>--> + </div> +</div> -<p>Apache Spark is an open source cluster computing system that aims to make data analytics <em>fast</em> — both fast to run and fast to write.</p> +<div class="row row-padded"> + <div class="col-md-7 col-sm-7"> + <h2>Generality</h2> -<p>To run programs faster, Spark offers a general execution model that can optimize arbitrary operator graphs, and supports in-memory computing, which lets it query data faster than disk-based engines like Hadoop.</p> + <p class="lead"> + Combine SQL, streaming, and complex analytics. + </p> -<p>To make programming faster, Spark provides clean, concise APIs in -<a href="http://www.scala-lang.org" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://www.scala-lang.org']);">Scala</a>, -<a href="/docs/latest/quick-start.html#a-standalone-app-in-java">Java</a> and -<a href="/docs/latest/quick-start.html#a-standalone-app-in-python">Python</a>. -You can also use Spark interactively from the Scala and Python shells to rapidly query big datasets.</p> + <p> + Spark powers a stack of high-level tools including + <a href="http://shark.cs.berkeley.edu">Shark</a> for SQL, <a href="/mllib/">MLlib</a> for machine learning, + <a href="http://amplab.github.io/graphx/">GraphX</a>, and <a href="/streaming/">Spark Streaming</a>. + You can combine these frameworks seamlessly in the same application. + </p> + </div> + <div class="col-md-5 col-sm-5 col-padded-top col-center"> + <img src="/images/spark-stack.png" style="margin-top: 15px; width: 100%; max-width: 296px;" usemap="#stack-map" /> + <map name="stack-map"> + <area shape="rect" coords="0,0,74,95" href="http://shark.cs.berkeley.edu" alt="Shark (SQL)" title="Shark" /> + <area shape="rect" coords="74,0,150,95" href="/streaming/" alt="Spark Streaming" title="Spark Streaming" /> + <area shape="rect" coords="150,0,224,95" href="/mllib/" alt="MLlib (machine learning)" title="MLlib" /> + <area shape="rect" coords="225,0,300,95" href="http://amplab.github.io/graphx/" alt="GraphX" title="GraphX" /> + </map> + </div> +</div> -<h2 id="what-can-it-do">What can it do?</h2> +<div class="row row-padded" style="margin-bottom: 15px;"> + <div class="col-md-7 col-sm-7"> + <h2>Integrated with Hadoop</h2> -<p>Spark was initially developed for two applications where placing data in memory helps: <em>iterative</em> algorithms, which are common in machine learning, and <em>interactive</em> data mining. In both cases, Spark can run up to <b>100x</b> faster than Hadoop MapReduce. However, you can use Spark for general data processing too. Check out our <a href="/examples.html">example jobs</a>.</p> + <p class="lead"> + Spark can run on Hadoop 2's YARN cluster manager, and can read + any existing Hadoop data. + </p> -<p>Spark is also the engine behind <a href="http://shark.cs.berkeley.edu" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://shark.cs.berkeley.edu']);">Shark</a>, a fully <a href="http://hive.apache.org" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://hive.apache.org']);">Apache Hive</a>-compatible data warehousing system that can run 100x faster than Hive.</p> + <p> + If you have a Hadoop 2 cluster, you can run Spark without any installation needed. + Otherwise, Spark is easy to run <a href="/docs/latest/spark-standalone.html">standalone</a> + or on <a href="/docs/latest/ec2-scripts.html">EC2</a> or <a href="http://mesos.apache.org">Mesos</a>. + It can read from <a href="http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">HDFS</a>, <a href="http://hbase.apache.org">HBase</a>, <a href="http://cassandra.apache.org">Cassandra</a>, + and any Hadoop data source. + </p> + </div> + <div class="col-md-5 col-sm-5 col-padded-top col-center"> + <img src="/images/hadoop.jpg" style="width: 100%; max-width: 280px;" /> + </div> +</div> -<p>While Spark is a new engine, it can access any data source supported by Hadoop, making it easy to run over existing data.</p> -<h2 id="who-uses-it">Who uses it?</h2> -<p>Spark was initially created in the <a href="https://amplab.cs.berkeley.edu" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://amplab.cs.berkeley.edu']);">UC Berkeley AMPLab</a>, but is now being used and developed at a wide array of companies. -See our <a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">powered by page</a> for a list of users, -and our <a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">list of committers</a>. -In total, over 25 companies have contributed code to Spark. -Spark is <a href="https://github.com/apache/incubator-spark" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://github.com']);">open source</a> under an Apache license, so <a href="/downloads.html">download</a> it to try it out.</p> + </div> +</div> -<h2 id="apache-incubator-notice">Apache Incubator notice</h2> -<p>Apache Spark is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.</p> + +<div class="row"> + <div class="col-md-4 col-padded"> + <h3>Community</h3> - </article><!-- #post --> - - </div><!-- #content --> - - <div id="secondary" class="widget-area" role="complementary"> - -<h3 class="widget-title">Latest News</h3> -<div class="latestnewswidget"> - - <div><a href="/news/spark-0-8-1-released.html">Spark 0.8.1 released</a> <span class="post-info">(December 19, 2013)</span></div> - - <div><a href="/news/spark-summit-2013-is-a-wrap.html">Spark Summit 2013 is a Wrap</a> <span class="post-info">(December 15, 2013)</span></div> - - <div><a href="/news/announcing-the-first-spark-summit.html">Announcing the first Spark Summit: December 2, 2013</a> <span class="post-info">(October 08, 2013)</span></div> - - <div><a href="/news/spark-0-8-0-released.html">Spark 0.8.0 released</a> <span class="post-info">(September 25, 2013)</span></div> - + <p> + Spark is used at a wide range of organizations to process large datasets. + You can find example use cases at the <a href="http://spark-summit.org/summit-2013/">Spark Summit</a> + conference, or on the + <a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a> + page. + </p> + + <p> + There are many ways to reach the community: + </p> + <ul class="list-narrow"> + <li>Use the <a href="/community.html#mailing-lists">mailing lists</a> to ask questions.</li> + <li>In-person events include the <a href="http://www.meetup.com/spark-users/">Bay Area Spark meetup</a> and + <a href="http://spark-summit.org/">Spark Summit</a>.</li> + <li>We use <a href="https://spark-project.atlassian.net">JIRA</a> for issue tracking.</li> + </ul> </div> -<div style="text-align:right"><a href="/news/index.html">News Archive</a></div> + <div class="col-md-4 col-padded"> + <h3>Contributors</h3> -<p><!-- Not porting the following to Pygments since it becomes a lot less colorful --></p> + <p> + Apache Spark is built by a wide set of developers from over 25 companies. + Since the project started in 2010, more than 120 developers have contributed to Spark! + </p> -<div class="code" style="margin-top: 20px;"> - file = spark.textFile(<span class="string">"hdfs://..."</span>)<br /> - <br /> - file.<span class="sparkop">flatMap</span>(<span class="closure">line => line.split(" ")</span>)<br /> - .<span class="sparkop">map</span>(<span class="closure">word => (word, 1)</span>)<br /> - .<span class="sparkop">reduceByKey</span>(<span class="closure">_ + _</span>) + <p> + The project's + <a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">committers</a> + come from 12 organizations. + </p> + + <p> + If you'd like to participate in Spark, or contribute to the libraries on top of it, learn + <a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">how to + contribute</a>. + </p> </div> -<div class="caption">Word Count implemented in Spark</div> -<div align="center" style="margin-top: 20px;"> - <img src="/images/spark-lr.png" alt="Logistic regression performance in Spark vs Hadoop" /> + <div class="col-md-4 col-padded"> + <h3>Getting Started</h3> + + <p>Learning Spark is easy whether you come from a Java or Python background:</p> + <ul class="list-narrow"> + <li><a href="/downloads.html">Download</a> the latest release — you can run Spark locally on your laptop.</li> + <li>Read the <a href="/docs/latest/quick-start.html">quick start guide</a>.</li> + <li> + Spark Summit 2013 contained free <a href="http://spark-summit.org/summit-2013/#day2">training videos</a> and <a href="http://spark-summit.org/2013/exercises/">exercises</a> + that you can run on Amazon EC2. + </li> + <li>Learn how to <a href="/docs/latest/#launching-on-a-cluster">deploy</a> Spark on a cluster.</li> + </ul> </div> -<div class="caption">Logistic regression in Spark vs Hadoop</div> -<h2 style="text-align:center"><a href="/downloads"><img src="/images/download.png" alt="Download" style="vertical-align: middle" /> Download Spark</a></h2> +</div> - </div> - - <footer id="colophon" role="contentinfo"> - <div id="site-generator"> - <p style="padding-top: 0; padding-bottom: 15px;"> - Apache Spark is an effort undergoing incubation at The Apache Software Foundation. - <a href="http://incubator.apache.org/" style="border: none;"> - <img style="vertical-align: middle; border: none;" src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" /> - </a> +<div class="row"> + <div class="col-sm-12 col-center"> + <a href="/downloads.html" class="btn btn-success btn-lg" style="width: 262px;">Download Spark</a> + </div> +</div> + +<div class="row"> + <div class="col-md-12 col-padded"> + <h3>Apache Incubator Notice</h3> + + <p> + <small> + Apache Spark is an effort undergoing incubation at The Apache Software Foundation (ASF), + sponsored by the Apache Incubator. Incubation is required of all newly accepted projects + until a further review indicates that the infrastructure, communications, and decision + making process have stabilized in a manner consistent with other successful ASF projects. + While incubation status is not necessarily a reflection of the completeness or stability + of the code, it does indicate that the project has yet to be fully endorsed by the ASF. + </small> </p> </div> -</footer><!-- #colophon --> +</div> + + + + +<footer class="small"> + <hr> + Apache Spark is an effort undergoing incubation at The Apache Software Foundation. + <a href="http://incubator.apache.org/" style="border: none;"> + <img style="vertical-align: middle; float: right; margin-bottom: 15px;" + src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" /> + </a> +</footer> - </div><!-- #primary --> - </div><!-- #main --> -</div><!-- #page --> +</div> +<script src="https://code.jquery.com/jquery.js"></script> +<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script> +<script src="/js/lang-tabs.js"></script> </body> </html> |