summaryrefslogtreecommitdiff
path: root/site/faq.html
diff options
context:
space:
mode:
Diffstat (limited to 'site/faq.html')
-rw-r--r--site/faq.html278
1 files changed, 160 insertions, 118 deletions
diff --git a/site/faq.html b/site/faq.html
index b9de8b936..0b2352d30 100644
--- a/site/faq.html
+++ b/site/faq.html
@@ -1,27 +1,20 @@
<!DOCTYPE html>
-<!--[if IE 6]>
-<html id="ie6" dir="ltr" lang="en-US">
-<![endif]-->
-<!--[if IE 7]>
-<html id="ie7" dir="ltr" lang="en-US">
-<![endif]-->
-<!--[if IE 8]>
-<html id="ie8" dir="ltr" lang="en-US">
-<![endif]-->
-<!--[if !(IE 6) | !(IE 7) | !(IE 8) ]><!-->
-<html dir="ltr" lang="en-US">
-<!--<![endif]-->
+<html lang="en">
<head>
- <link rel="shortcut icon" href="/favicon.ico" />
- <meta charset="UTF-8" />
- <meta name="viewport" content="width=device-width" />
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
+
<title>
FAQ | Apache Spark
</title>
- <link rel="stylesheet" type="text/css" media="all" href="/css/style.css" />
- <link rel="stylesheet" href="/css/pygments-default.css">
+
+
+ <!-- Bootstrap core CSS -->
+ <link href="/css/cerulean.min.css" rel="stylesheet">
+ <link href="/css/custom.css" rel="stylesheet">
<script type="text/javascript">
<!-- Google Analytics initialization -->
@@ -46,141 +39,190 @@
}
</script>
- <link rel='canonical' href='/index.html' />
-
- <style type="text/css">
- #site-title,
- #site-description {
- position: absolute !important;
- clip: rect(1px 1px 1px 1px); /* IE6, IE7 */
- clip: rect(1px, 1px, 1px, 1px);
- }
- </style>
- <style type="text/css" id="custom-background-css">
- body.custom-background { background-color: #f1f1f1; }
- </style>
+ <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
+ <!--[if lt IE 9]>
+ <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
+ <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
+ <![endif]-->
</head>
-<!--body class="page singular"-->
-<body class="page singular">
-<div id="page" class="hfeed">
-
- <header id="branding" role="banner">
- <hgroup>
- <h1 id="site-title"><span><a href="/" title="Spark" rel="home">Spark</a></span></h1>
- <h2 id="site-description">Lightning-Fast Cluster Computing</h2>
- </hgroup>
-
- <a id="main-logo" href="/">
- <img style="height:175px; width:auto;" src="/images/spark-project-header1-cropped.png" alt="Spark: Lightning-Fast Cluster Computing" title="Spark: Lightning-Fast Cluster Computing" />
- </a>
- <div class="widget-summit">
- <a href="http://spark-summit.org"><img src="/images/Summit-Logo-FINALtr-150x150px.png" /></a>
- <div class="text">
- <a href="http://spark-summit.org/2013">
-
- <strong>Videos and Slides<br/>
- Available Now!</strong>
- </a>
- </div>
+<body>
+
+<div class="container" style="max-width: 1200px;">
+
+<div class="masthead">
+
+ <p class="lead">
+ <a href="/">
+ <img src="/images/spark-logo.png"
+ style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
+ Lightning-fast cluster computing
+ </span>
+ </p>
+
+</div>
+
+<nav class="navbar navbar-default" role="navigation">
+ <!-- Brand and toggle get grouped for better mobile display -->
+ <div class="navbar-header">
+ <button type="button" class="navbar-toggle" data-toggle="collapse"
+ data-target="#navbar-collapse-1">
+ <span class="sr-only">Toggle navigation</span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ <span class="icon-bar"></span>
+ </button>
</div>
- <nav id="access" role="navigation">
- <h3 class="assistive-text">Main menu</h3>
- <div class="menu-main-menu-container">
- <ul id="menu-main-menu" class="menu">
-
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/index.html">Home</a>
- </li>
-
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/downloads.html">Downloads</a>
- </li>
-
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/documentation.html">Documentation</a>
- </li>
+ <!-- Collect the nav links, forms, and other content for toggling -->
+ <div class="collapse navbar-collapse" id="navbar-collapse-1">
+ <ul class="nav navbar-nav">
+ <li><a href="/downloads.html">Download</a></li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+ Related Projects <b class="caret"></b>
+ </a>
+ <ul class="dropdown-menu">
+ <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
+ <li><a href="/streaming/">Spark Streaming</a></li>
+ <li><a href="/mllib/">MLlib (machine learning)</a></li>
+ <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li>
+ </ul>
+ </li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+ Documentation <b class="caret"></b>
+ </a>
+ <ul class="dropdown-menu">
+ <li><a href="/documentation.html">Overview</a></li>
+ <li><a href="/docs/latest/">Latest Release</a></li>
+ <li><a href="/examples.html">Examples</a></li>
+ </ul>
+ </li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown">
+ Community <b class="caret"></b>
+ </a>
+ <ul class="dropdown-menu">
+ <li><a href="/community.html">Mailing Lists</a></li>
+ <li><a href="/community.html#events">Events and Meetups</a></li>
+ <li><a href="/community.html#history">Project History</a></li>
+ <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
+ </ul>
+ </li>
+ <li><a href="/faq.html">FAQ</a></li>
+ </ul>
+ </div>
+ <!-- /.navbar-collapse -->
+</nav>
+
+
+<div class="row">
+ <div class="col-md-3 col-md-push-9">
+ <div class="news" style="margin-bottom: 20px;">
+ <h5>Latest News</h5>
+ <ul class="list-unstyled">
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/examples.html">Examples</a>
- </li>
+ <li><a href="/news/spark-0-8-1-released.html">Spark 0.8.1 released</a>
+ <span class="small">(Dec 19, 2013)</span></li>
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/mailing-lists.html">Mailing Lists</a>
- </li>
+ <li><a href="/news/spark-summit-2013-is-a-wrap.html">Spark Summit 2013 is a Wrap</a>
+ <span class="small">(Dec 15, 2013)</span></li>
- <li class="menu-item menu-item-type-post_type menu-item-object-page ">
- <a href="/research.html">Research</a>
- </li>
+ <li><a href="/news/announcing-the-first-spark-summit.html">Announcing the first Spark Summit: December 2, 2013</a>
+ <span class="small">(Oct 08, 2013)</span></li>
- <li class="menu-item menu-item-type-post_type menu-item-object-page current-menu-item">
- <a href="/faq.html">FAQ</a>
- </li>
+ <li><a href="/news/spark-0-8-0-released.html">Spark 0.8.0 released</a>
+ <span class="small">(Sep 25, 2013)</span></li>
- </ul></div>
- </nav><!-- #access -->
-</header><!-- #branding -->
-
-
+ </ul>
+ <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
+ </div>
+ <div class="hidden-xs hidden-sm">
+ <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
+ Download Spark
+ </a>
+ <p style="font-size: 16px; font-weight: 500; color: #555;">
+ Related Projects:
+ </p>
+ <ul class="list-narrow">
+ <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
+ <li><a href="/streaming/">Spark Streaming</a></li>
+ <li><a href="/mllib/">MLlib (machine learning)</a></li>
+ <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li>
+ </ul>
+ </div>
+ </div>
- <div id="main">
- <div id="primary">
- <div id="content" role="main">
-
- <article class="page type-page status-publish hentry">
- <h2>Spark FAQ</h2>
+ <div class="col-md-9 col-md-pull-3">
+ <h2>Spark FAQ</h2>
-<p class="question">Is Spark a modified version of Hadoop?</p>
-<p class="answer">No. Spark is a completely separate codebase optimized for low latency, although it can load data from any Hadoop input source (InputFormat).</p>
+<p class="question">How does Spark relate to Hadoop?</p>
+<p class="answer">
+Spark is a fast and powerful engine for processing Hadoop data.
+It runs in Hadoop clusters through
+<a href="http://hadoop.apache.org/docs/current2/hadoop-yarn/hadoop-yarn-site/YARN.html">Hadoop YARN</a>
+or Spark's <a href="/docs/latest/spark-standalone.html">standalone mode</a>, and it can process
+data in HDFS, HBase, Cassandra, Hive, and any Hadoop InputFormat.
+It is designed to perform both general data processing (similar to MapReduce) and new workloads like
+streaming, interactive queries, and machine learning.
+</p>
<p class="question">Which languages does Spark support?</p>
-<p class="answer">Starting in version 0.7, Spark supports Scala, Java and Python.</p>
+<p class="answer">Spark supports Scala, Java and Python.</p>
<p class="question">Does Spark require modified versions of Scala or Python?</p>
<p class="answer">No. Spark requires no changes to Scala or compiler plugins. The Python API uses the standard CPython implementation, and can call into existing C libraries for Python such as NumPy.</p>
<p class="question">What happens when a cached dataset does not fit in memory?</p>
-<p class="answer">Spark can either spill it to disk or recompute the partitions that don't fit in RAM each time they are requested. By default, it uses recomputation, but you can set a dataset's <a href="/docs/latest/scala-programming-guide.html#rdd-persistence">storage level</a> to <tt>MEMORY_AND_DISK</tt> to avoid this. </p>
+<p class="answer">Spark can either spill it to disk or recompute the partitions that don't fit in RAM each time they are requested. By default, it uses recomputation, but you can set a dataset's <a href="/docs/latest/scala-programming-guide.html#rdd-persistence">storage level</a> to <code>MEMORY_AND_DISK</code> to avoid this. </p>
<p class="question">How can I run Spark on a cluster?</p>
-<p class="answer">You can use either the <a href="/docs/latest/spark-standalone.html">standalone deploy mode</a>, which depends only on Java, or the <a href="/docs/latest/running-on-mesos.html">Apache Mesos</a> cluster manager.</p>
-<p>Note that you can also run Spark locally (possibly on multiple cores) without any special setup by just passing <tt>local[N]</tt> as the master URL, where <tt>N</tt> is the number of parallel threads you want.</p>
+<p class="answer">You can use either the <a href="/docs/latest/spark-standalone.html">standalone deploy mode</a>, which only needs Java to be installed on each node, or the <a href="/docs/latest/running-on-mesos.html">Mesos</a> and <a href="/docs/latest/running-on-yarn.html">YARN</a> cluster managers. If you'd like to run on Amazon EC2, Spark provides <a href="/docs/latest/ec2-scripts.html}}">EC2 scripts</a> to automatically launch a cluster.</p>
+
+<p>Note that you can also run Spark locally (possibly on multiple cores) without any special setup by just passing <code>local[N]</code> as the master URL, where <code>N</code> is the number of parallel threads you want.</p>
+
+<p class="question">Do I need Hadoop to run Spark?</p>
+<p class="answer">No, but if you run on a cluster, you will need some form of shared file system (for example, NFS mounted at the same path on each node). If you have this type of filesystem, you can just deploy Spark in standalone mode.</p>
-<p class="question">I don't know Scala; how hard is it to pick it up to use Spark?</p>
-<p class="answer">Scala itself is pretty easy to pick up if you have Java experience. Check out <a href="http://www.artima.com/scalazine/articles/steps.html">First Steps to Scala</a> for a quick introduction, the <a href="http://www.scala-lang.org/docu/files/ScalaTutorial.pdf">Scala tutorial for Java programmers</a>, or the free online book <a href="http://www.artima.com/pins1ed/">Programming in Scala</a>.</p>
-<p>Spark 0.6 also added a <a href="/docs/latest/java-programming-guide.html">Java API</a>, letting you use Spark from Java, and Spark 0.7 added a <a href="/docs/latest/python-programming-guide.html">Python API</a>.</p>
+<p class="question">How can I access data in S3?</p>
+<p class="answer">Use the <code>s3n://</code> URI scheme (<code>s3n://bucket/path</code>). You will also need to set your Amazon security credentials, either by setting the environment variables <code>AWS_ACCESS_KEY_ID</code> and <code>AWS_SECRET_ACCESS_KEY</code> before your program runs, or by setting <code>fs.s3.awsAccessKeyId</code> and <code>fs.s3.awsSecretAccessKey</code> in <code>SparkContext.hadoopConfiguration</code>.</p>
+
+<p class="question">What are good resources for learning Scala?</p>
+<p class="answer">Check out <a href="http://www.artima.com/scalazine/articles/steps.html">First Steps to Scala</a> for a quick introduction, the <a href="http://www.scala-lang.org/docu/files/ScalaTutorial.pdf">Scala tutorial for Java programmers</a>, or the free online book <a href="http://www.artima.com/pins1ed/">Programming in Scala</a>. Scala is easy to transition to if you have Java experience or experience in a similarly high-level language (e.g. Ruby).</p>
+
+<p>In addition, Spark also has <a href="/docs/latest/java-programming-guide.html">Java</a> and <a href="/docs/latest/python-programming-guide.html">Python</a> APIs.</p>
<p class="question">What license is Spark under?</p>
<p class="answer">Starting in version 0.8, Spark is under the <a href="http://www.apache.org/licenses/LICENSE-2.0.html">Apache 2.0 license</a>. Previous versions used the <a href="https://github.com/mesos/spark/blob/branch-0.7/LICENSE">BSD license</a>.</p>
<p class="question">How can I contribute to Spark?</p>
-<p class="answer">Contact the <a href="http://groups.google.com/group/spark-users">mailing list</a> or send us a pull request on GitHub. We're glad to hear about your experience using Spark and to accept patches </p>
+<p class="answer">Contact the <a href="/community.html">mailing list</a> or send us a pull request on <a href="https://github.com/apache/incubator-spark">GitHub</a> (instructions <a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">here</a>). We're glad to hear about your experience using Spark and to accept patches.</p>
<p>If you would like to report an issue, post it to the <a href="https://spark-project.atlassian.net/browse/SPARK">Spark issue tracker</a>.</p>
<p class="question">Where can I get more help?</p>
-<p class="answer">Please post on the <a href="http://groups.google.com/group/spark-users">spark-users</a> mailing list. We'll be glad to help!</p>
+<p class="answer">Please post on the <a href="http://apache-spark-user-list.1001560.n3.nabble.com">Spark Users</a> mailing list. We'll be glad to help!</p>
- </article><!-- #post -->
-
- </div><!-- #content -->
-
- <footer id="colophon" role="contentinfo">
- <div id="site-generator">
- <p style="padding-top: 0; padding-bottom: 15px;">
- Apache Spark is an effort undergoing incubation at The Apache Software Foundation.
- <a href="http://incubator.apache.org/" style="border: none;">
- <img style="vertical-align: middle; border: none;" src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" />
- </a>
- </p>
</div>
-</footer><!-- #colophon -->
+</div>
+
+
+
+<footer class="small">
+ <hr>
+ Apache Spark is an effort undergoing incubation at The Apache Software Foundation.
+ <a href="http://incubator.apache.org/" style="border: none;">
+ <img style="vertical-align: middle; float: right; margin-bottom: 15px;"
+ src="/images/incubator-logo.png" alt="Apache Incubator" title="Apache Incubator" />
+ </a>
+</footer>
- </div><!-- #primary -->
- </div><!-- #main -->
-</div><!-- #page -->
+</div>
+<script src="https://code.jquery.com/jquery.js"></script>
+<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
+<script src="/js/lang-tabs.js"></script>
</body>
</html>