summaryrefslogtreecommitdiff
path: root/site/index.html
diff options
context:
space:
mode:
Diffstat (limited to 'site/index.html')
-rw-r--r--site/index.html459
1 files changed, 308 insertions, 151 deletions
diff --git a/site/index.html b/site/index.html
index d88e47d12..ad2305872 100644
--- a/site/index.html
+++ b/site/index.html
@@ -1,27 +1,20 @@
<!DOCTYPE html>
-<!--[if IE 6]>
-<html id="ie6" dir="ltr" lang="en-US">
-<![endif]-->
-<!--[if IE 7]>
-<html id="ie7" dir="ltr" lang="en-US">
-<![endif]-->
-<!--[if IE 8]>
-<html id="ie8" dir="ltr" lang="en-US">
-<![endif]-->
-<!--[if !(IE 6) | !(IE 7) | !(IE 8) ]><!-->
-<html dir="ltr" lang="en-US">
-<!--<![endif]-->
+<html lang="en">
<head>
- <link rel="shortcut icon" href="/favicon.ico" />
- <meta charset="UTF-8" />
- <meta name="viewport" content="width=device-width" />
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
<title>
Apache Spark - Lightning-Fast Cluster Computing
</title>
- <link rel="stylesheet" type="text/css" media="all" href="/css/style.css" />
- <link rel="stylesheet" href="/css/pygments-default.css">
+
+
+ <!-- Bootstrap core CSS -->
+ <link href="/css/cerulean.min.css" rel="stylesheet">
+ <link href="/css/custom.css" rel="stylesheet">
<script type="text/javascript">
<!-- Google Analytics initialization -->
@@ -46,175 +39,339 @@
}
</script>
- <link rel='canonical' href='/index.html' />
-
- <style type="text/css">
- #site-title,
- #site-description {
- position: absolute !important;
- clip: rect(1px 1px 1px 1px); /* IE6, IE7 */
- clip: rect(1px, 1px, 1px, 1px);
- }
- </style>
- <style type="text/css" id="custom-background-css">
- body.custom-background { background-color: #f1f1f1; }
- </style>
+ <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
+ <!--[if lt IE 9]>
+ <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
+ <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
+ <![endif]-->
</head>
-<!--body class="page two-column right-sidebar"-->
-<body class="page">
-<div id="page" class="hfeed">
-
- <header id="branding" role="banner">
- <hgroup>
- <h1 id="site-title"><span><a href="/" title="Spark" rel="home">Spark</a></span></h1>
- <h2 id="site-description">Lightning-Fast Cluster Computing</h2>
- </hgroup>
-
- <a id="main-logo" href="/">
- <img style="height:175px; width:auto;" src="/images/spark-project-header1-cropped.png" alt="Spark: Lightning-Fast Cluster Computing" title="Spark: Lightning-Fast Cluster Computing" />
- </a>
- <div class="widget-summit">
- <a href="http://spark-summit.org"><img src="/images/Summit-Logo-FINALtr-150x150px.png" /></a>
- <div class="text">
- <a href="http://spark-summit.org/2013">
-
- <strong>Videos and Slides<br/>
- Available Now!</strong>
- </a>
- </div>
+<body>
+
+<div class="container" style="max-width: 1200px;">
+
+<div class="masthead">
+
+ <p class="lead">
+ <a href="/">
+ <img src="/images/spark-logo.png"
+ style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
+ Lightning-fast cluster computing
+ </span>
+ </p>
+
+</div>
+
+<nav class="navbar navbar-default" role="navigation">
+ <!-- Brand and toggle get grouped for better mobile display -->
+ <div class="navbar-header">
+ <button type="button" class="navbar-toggle" data-toggle="collapse"
+ data-target="#navbar-collapse-1">
+ <span class="sr-only">Toggle navigation</span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ </button>
</div>
- <nav id="access" role="navigation">
- <h3 class="assistive-text">Main menu</h3>
- <div class="menu-main-menu-container">
- <ul id="menu-main-menu" class="menu">
-
- <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-item">
- <a href="/index.html">Home</a>
- </li>
-
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/downloads.html">Downloads</a>
- </li>
-
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/documentation.html">Documentation</a>
- </li>
+ <!-- Collect the nav links, forms, and other content for toggling -->
+ <div class="collapse navbar-collapse" id="navbar-collapse-1">
+ <ul class="nav navbar-nav">
+ <li><a href="/downloads.html">Download</a></li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+ Related Projects <b class="caret"></b>
+ </a>
+ <ul class="dropdown-menu">
+ <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
+ <li><a href="/streaming/">Spark Streaming</a></li>
+ <li><a href="/mllib/">MLlib (machine learning)</a></li>
+ <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li>
+ </ul>
+ </li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+ Documentation <b class="caret"></b>
+ </a>
+ <ul class="dropdown-menu">
+ <li><a href="/documentation.html">Overview</a></li>
+ <li><a href="/docs/latest/">Latest Release</a></li>
+ <li><a href="/examples.html">Examples</a></li>
+ </ul>
+ </li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+ Community <b class="caret"></b>
+ </a>
+ <ul class="dropdown-menu">
+ <li><a href="/community.html">Mailing Lists</a></li>
+ <li><a href="/community.html#events">Events and Meetups</a></li>
+ <li><a href="/community.html#history">Project History</a></li>
+ <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
+ </ul>
+ </li>
+ <li><a href="/faq.html">FAQ</a></li>
+ </ul>
+ </div>
+ <!-- /.navbar-collapse -->
+</nav>
+
+
+<div class="row">
+ <div class="col-md-3 col-md-push-9">
+ <div class="news" style="margin-bottom: 20px;">
+ <h5>Latest News</h5>
+ <ul class="list-unstyled">
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/examples.html">Examples</a>
- </li>
+ <li><a href="/news/spark-0-8-1-released.html">Spark 0.8.1 released</a>
+ <span class="small">(Dec 19, 2013)</span></li>
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/mailing-lists.html">Mailing Lists</a>
- </li>
+ <li><a href="/news/spark-summit-2013-is-a-wrap.html">Spark Summit 2013 is a Wrap</a>
+ <span class="small">(Dec 15, 2013)</span></li>
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/research.html">Research</a>
- </li>
+ <li><a href="/news/announcing-the-first-spark-summit.html">Announcing the first Spark Summit: December 2, 2013</a>
+ <span class="small">(Oct 08, 2013)</span></li>
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/faq.html">FAQ</a>
- </li>
+ <li><a href="/news/spark-0-8-0-released.html">Spark 0.8.0 released</a>
+ <span class="small">(Sep 25, 2013)</span></li>
- </ul></div>
- </nav><!-- #access -->
-</header><!-- #branding -->
+ </ul>
+ <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
+ </div>
+ <div class="hidden-xs hidden-sm">
+ <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
+ Download Spark
+ </a>
+ <p style="font-size: 16px; font-weight: 500; color: #555;">
+ Related Projects:
+ </p>
+ <ul class="list-narrow">
+ <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
+ <li><a href="/streaming/">Spark Streaming</a></li>
+ <li><a href="/mllib/">MLlib (machine learning)</a></li>
+ <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li>
+ </ul>
+ </div>
+ </div>
+ <div class="col-md-9 col-md-pull-3">
+ <div class="jumbotron">
+ <b>Apache Spark</b> is a fast and general engine for large-scale data processing.
+</div>
+<div class="row row-padded">
+ <div class="col-md-7 col-sm-7">
+ <h2>Speed</h2>
- <div id="main">
- <div id="primary">
- <div id="content" role="main">
-
- <article class="page type-page status-publish hentry">
- <h2 id="what-is-apache-spark">What is Apache Spark?</h2>
+ <p class="lead">
+ Run programs up to 100x faster than
+ Hadoop MapReduce in memory, or 10x faster on disk.
+ </p>
+
+ <p>
+ Spark has an advanced DAG execution engine that supports cyclic data flow and
+ in-memory computing.
+ </p>
+ </div>
+ <div class="col-md-5 col-sm-5 col-padded-top col-center">
+ <div style="width: 100%; max-width: 272px; display: inline-block; text-align: center;">
+ <img src="/images/logistic-regression.png" style="width: 100%; max-width: 250px;" />
+ <div class="caption" style="min-width: 272px;">Logistic regression in Hadoop and Spark</div>
+ </div>
+ </div>
+</div>
+
+<div class="row row-padded">
+ <div class="col-md-7 col-sm-7">
+ <h2>Ease of Use</h2>
+
+ <p class="lead">
+ Write applications quickly in Java, Scala or Python.
+ </p>
+
+ <p>
+ Spark offers over 80 high-level operators that make it easy to build parallel apps.
+ And you can use it <em>interactively</em>
+ from the Scala and Python shells.
+ </p>
+ </div>
+ <div class="col-md-5 col-sm-5 col-padded-top col-center">
+ <div style="text-align: left; display: inline-block;">
+ <div class="code">
+ file = spark.textFile(<span class="string">"hdfs://..."</span>)<br />
+ &nbsp;<br />
+ file.<span class="sparkop">flatMap</span>(<span class="closure">line =&gt; line.split(" ")</span>)<br />
+ &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">map</span>(<span class="closure">word =&gt; (word, 1)</span>)<br />
+ &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">reduceByKey</span>(<span class="closure">_ + _</span>)
+ </div>
+ <div class="caption">Word count in Spark</div>
+ </div>
+ <!--
+ <div class="code" style="margin-top: 20px; text-align: left; display: inline-block;">
+ file = spark.textFile(<span class="string">"hdfs://..."</span>)<br/>
+ &nbsp;<br/>
+ file.<span class="sparkop">filter</span>(<span class="closure">lambda line: "ERROR" in line</span>)<br/>
+ &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">count</span>()
+ </div>
+ -->
+ <!--<div class="caption">Word count in Spark</div>-->
+ </div>
+</div>
-<p>Apache Spark is an open source cluster computing system that aims to make data analytics <em>fast</em> — both fast to run and fast to write.</p>
+<div class="row row-padded">
+ <div class="col-md-7 col-sm-7">
+ <h2>Generality</h2>
-<p>To run programs faster, Spark offers a general execution model that can optimize arbitrary operator graphs, and supports in-memory computing, which lets it query data faster than disk-based engines like Hadoop.</p>
+ <p class="lead">
+ Combine SQL, streaming, and complex analytics.
+ </p>
-<p>To make programming faster, Spark provides clean, concise APIs in
-<a href="http://www.scala-lang.org" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://www.scala-lang.org']);">Scala</a>,
-<a href="/docs/latest/quick-start.html#a-standalone-app-in-java">Java</a> and
-<a href="/docs/latest/quick-start.html#a-standalone-app-in-python">Python</a>.
-You can also use Spark interactively from the Scala and Python shells to rapidly query big datasets.</p>
+ <p>
+ Spark powers a stack of high-level tools including
+ <a href="http://shark.cs.berkeley.edu">Shark</a> for SQL, <a href="/mllib/">MLlib</a> for machine learning,
+ <a href="http://amplab.github.io/graphx/">GraphX</a>, and <a href="/streaming/">Spark Streaming</a>.
+ You can combine these frameworks seamlessly in the same application.
+ </p>
+ </div>
+ <div class="col-md-5 col-sm-5 col-padded-top col-center">
+ <img src="/images/spark-stack.png" style="margin-top: 15px; width: 100%; max-width: 296px;" usemap="#stack-map" />
+ <map name="stack-map">
+ <area shape="rect" coords="0,0,74,95" href="http://shark.cs.berkeley.edu" alt="Shark (SQL)" title="Shark" />
+ <area shape="rect" coords="74,0,150,95" href="/streaming/" alt="Spark Streaming" title="Spark Streaming" />
+ <area shape="rect" coords="150,0,224,95" href="/mllib/" alt="MLlib (machine learning)" title="MLlib" />
+ <area shape="rect" coords="225,0,300,95" href="http://amplab.github.io/graphx/" alt="GraphX" title="GraphX" />
+ </map>
+ </div>
+</div>
-<h2 id="what-can-it-do">What can it do?</h2>
+<div class="row row-padded" style="margin-bottom: 15px;">
+ <div class="col-md-7 col-sm-7">
+ <h2>Integrated with Hadoop</h2>
-<p>Spark was initially developed for two applications where placing data in memory helps: <em>iterative</em> algorithms, which are common in machine learning, and <em>interactive</em> data mining. In both cases, Spark can run up to <b>100x</b> faster than Hadoop MapReduce. However, you can use Spark for general data processing too. Check out our <a href="/examples.html">example jobs</a>.</p>
+ <p class="lead">
+ Spark can run on Hadoop 2's YARN cluster manager, and can read
+ any existing Hadoop data.
+ </p>
-<p>Spark is also the engine behind <a href="http://shark.cs.berkeley.edu" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://shark.cs.berkeley.edu']);">Shark</a>, a fully <a href="http://hive.apache.org" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://hive.apache.org']);">Apache Hive</a>-compatible data warehousing system that can run 100x faster than Hive.</p>
+ <p>
+ If you have a Hadoop 2 cluster, you can run Spark without any installation needed.
+ Otherwise, Spark is easy to run <a href="/docs/latest/spark-standalone.html">standalone</a>
+ or on <a href="/docs/latest/ec2-scripts.html">EC2</a> or <a href="http://mesos.apache.org">Mesos</a>.
+ It can read from <a href="http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">HDFS</a>, <a href="http://hbase.apache.org">HBase</a>, <a href="http://cassandra.apache.org">Cassandra</a>,
+ and any Hadoop data source.
+ </p>
+ </div>
+ <div class="col-md-5 col-sm-5 col-padded-top col-center">
+ <img src="/images/hadoop.jpg" style="width: 100%; max-width: 280px;" />
+ </div>
+</div>
-<p>While Spark is a new engine, it can access any data source supported by Hadoop, making it easy to run over existing data.</p>
-<h2 id="who-uses-it">Who uses it?</h2>
-<p>Spark was initially created in the <a href="https://amplab.cs.berkeley.edu" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://amplab.cs.berkeley.edu']);">UC Berkeley AMPLab</a>, but is now being used and developed at a wide array of companies.
-See our <a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">powered by page</a> for a list of users,
-and our <a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">list of committers</a>.
-In total, over 25 companies have contributed code to Spark.
-Spark is <a href="https://github.com/apache/incubator-spark" onclick="javascript:_gaq.push(['_trackEvent','outbound-article','http://github.com']);">open source</a> under an Apache license, so <a href="/downloads.html">download</a> it to try it out.</p>
+ </div>
+</div>
-<h2 id="apache-incubator-notice">Apache Incubator notice</h2>
-<p>Apache Spark is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.</p>
+
+<div class="row">
+ <div class="col-md-4 col-padded">
+ <h3>Community</h3>
- </article><!-- #post -->
-
- </div><!-- #content -->
-
- <div id="secondary" class="widget-area" role="complementary">
-
-<h3 class="widget-title">Latest News</h3>
-<div class="latestnewswidget">
-
- <div><a href="/news/spark-0-8-1-released.html">Spark 0.8.1 released</a> <span class="post-info">(December 19, 2013)</span></div>
-
- <div><a href="/news/spark-summit-2013-is-a-wrap.html">Spark Summit 2013 is a Wrap</a> <span class="post-info">(December 15, 2013)</span></div>
-
- <div><a href="/news/announcing-the-first-spark-summit.html">Announcing the first Spark Summit: December 2, 2013</a> <span class="post-info">(October 08, 2013)</span></div>
-
- <div><a href="/news/spark-0-8-0-released.html">Spark 0.8.0 released</a> <span class="post-info">(September 25, 2013)</span></div>
-
+ <p>
+ Spark is used at a wide range of organizations to process large datasets.
+ You can find example use cases at the <a href="http://spark-summit.org/summit-2013/">Spark Summit</a>
+ conference, or on the
+ <a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a>
+ page.
+ </p>
+
+ <p>
+ There are many ways to reach the community:
+ </p>
+ <ul class="list-narrow">
+ <li>Use the <a href="/community.html#mailing-lists">mailing lists</a> to ask questions.</li>
+ <li>In-person events include the <a href="http://www.meetup.com/spark-users/">Bay Area Spark meetup</a> and
+ <a href="http://spark-summit.org/">Spark Summit</a>.</li>
+ <li>We use <a href="https://spark-project.atlassian.net">JIRA</a> for issue tracking.</li>
+ </ul>
</div>
-<div style="text-align:right"><a href="/news/index.html">News Archive</a></div>
+ <div class="col-md-4 col-padded">
+ <h3>Contributors</h3>
-<p><!-- Not porting the following to Pygments since it becomes a lot less colorful --></p>
+ <p>
+ Apache Spark is built by a wide set of developers from over 25 companies.
+ Since the project started in 2010, more than 120 developers have contributed to Spark!
+ </p>
-<div class="code" style="margin-top: 20px;">
- file = spark.textFile(<span class="string">"hdfs://..."</span>)<br />
- &nbsp;<br />
- file.<span class="sparkop">flatMap</span>(<span class="closure">line =&gt; line.split(" ")</span>)<br />
- &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">map</span>(<span class="closure">word =&gt; (word, 1)</span>)<br />
- &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">reduceByKey</span>(<span class="closure">_ + _</span>)
+ <p>
+ The project's
+ <a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">committers</a>
+ come from 12 organizations.
+ </p>
+
+ <p>
+ If you'd like to participate in Spark, or contribute to the libraries on top of it, learn
+ <a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">how to
+ contribute</a>.
+ </p>
</div>
-<div class="caption">Word Count implemented in Spark</div>
-<div align="center" style="margin-top: 20px;">
- <img src="/images/spark-lr.png" alt="Logistic regression performance in Spark vs Hadoop" />
+ <div class="col-md-4 col-padded">
+ <h3>Getting Started</h3>
+
+ <p>Learning Spark is easy whether you come from a Java or Python background:</p>
+ <ul class="list-narrow">
+ <li><a href="/downloads.html">Download</a> the latest release &mdash; you can run Spark locally on your laptop.</li>
+ <li>Read the <a href="/docs/latest/quick-start.html">quick start guide</a>.</li>
+ <li>
+ Spark Summit 2013 contained free <a href="http://spark-summit.org/summit-2013/#day2">training videos</a> and <a href="http://spark-summit.org/2013/exercises/">exercises</a>
+ that you can run on Amazon EC2.
+ </li>
+ <li>Learn how to <a href="/docs/latest/#launching-on-a-cluster">deploy</a> Spark on a cluster.</li>
+ </ul>
</div>
-<div class="caption">Logistic regression in Spark vs Hadoop</div>
-<h2 style="text-align:center"><a href="/downloads"><img src="/images/download.png" alt="Download" style="vertical-align: middle" />&nbsp;&nbsp;Download Spark</a></h2>
+</div>
- </div>
-
- <footer id="colophon" role="contentinfo">
- <div id="site-generator">
- <p style="padding-top: 0; padding-bottom: 15px;">
- Apache Spark is an effort undergoing incubation at The Apache Software Foundation.
- <a href="http://incubator.apache.org/" style="border: none;">
- <img style="vertical-align: middle; border: none;" src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" />
- </a>
+<div class="row">
+ <div class="col-sm-12 col-center">
+ <a href="/downloads.html" class="btn btn-success btn-lg" style="width: 262px;">Download Spark</a>
+ </div>
+</div>
+
+<div class="row">
+ <div class="col-md-12 col-padded">
+ <h3>Apache Incubator Notice</h3>
+
+ <p>
+ <small>
+ Apache Spark is an effort undergoing incubation at The Apache Software Foundation (ASF),
+ sponsored by the Apache Incubator. Incubation is required of all newly accepted projects
+ until a further review indicates that the infrastructure, communications, and decision
+ making process have stabilized in a manner consistent with other successful ASF projects.
+ While incubation status is not necessarily a reflection of the completeness or stability
+ of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
+ </small>
</p>
</div>
-</footer><!-- #colophon -->
+</div>
+
+
+
+
+<footer class="small">
+ <hr>
+ Apache Spark is an effort undergoing incubation at The Apache Software Foundation.
+ <a href="http://incubator.apache.org/" style="border: none;">
+ <img style="vertical-align: middle; float: right; margin-bottom: 15px;"
+ src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" />
+ </a>
+</footer>
- </div><!-- #primary -->
- </div><!-- #main -->
-</div><!-- #page -->
+</div>
+<script src="https://code.jquery.com/jquery.js"></script>
+<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
+<script src="/js/lang-tabs.js"></script>
</body>
</html>