summaryrefslogblamecommitdiff
path: root/site/news/run-spark-and-shark-on-amazon-emr.html
blob: ec904cc231967aeb6f520f4782cb4f882ad2c3d1 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
               
                
      



                                                                        



                                                       
 




                                                      
 









                                                                                                                     
 









                                                              
           
 




                                                                                     

       

























                                                                                                              
        
 








                                                                       
          






































                                                                                                              
        


                                                                                


                                                                                            


                                                                                


                                                                                                                                










                                                                                                      
        






                                                                             
 

                                                

 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          

 



                                       
 
        





                      

                                                                     
         
 
      
 


                                                                                     


       
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark/Shark Tutorial for Amazon EMR | Apache Spark
    
  </title>

  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {  
    try { 
      _gaq.push(['_trackEvent', category , action]); 
    } catch(err){}
 
    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo.png"
        style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a><span class="tagline">
          Lightning-fast cluster computing
      </span>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Related Projects <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          
          <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/documentation.html">Overview</a></li>
          <li><a href="/docs/latest/">Latest Release</a></li>
          <li><a href="/examples.html">Examples</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-1-0-0-released.html">Spark 1.0.0 released</a>
          <span class="small">(May 30, 2014)</span></li>
        
          <li><a href="/news/spark-summit-agenda-posted.html">Spark Summit agenda posted</a>
          <span class="small">(May 11, 2014)</span></li>
        
          <li><a href="/news/spark-0-9-1-released.html">Spark 0.9.1 released</a>
          <span class="small">(Apr 09, 2014)</span></li>
        
          <li><a href="/news/submit-talks-to-spark-summit-2014.html">Submissions and registration open for Spark Summit 2014</a>
          <span class="small">(Mar 20, 2014)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Related Projects:
      </p>
      <ul class="list-narrow">
        
        <li><a href="http://shark.cs.berkeley.edu">Shark (SQL)</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="http://amplab.github.io/graphx/">GraphX (graph)</a></li>
      </ul>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <h2>Spark/Shark Tutorial for Amazon EMR</h2>


<p>This weekend, Amazon posted an <a href="http://aws.amazon.com/articles/Elastic-MapReduce/4926593393724923">article</a> and code that make it easy to launch Spark and Shark on Elastic MapReduce. The article includes examples of how to run both interactive Scala commands and SQL queries from Shark on data in S3. Head over to the <a href="http://aws.amazon.com/articles/Elastic-MapReduce/4926593393724923">Amazon article</a> for details. We&#8217;re very excited because, to our knowledge, this makes Spark the first non-Hadoop engine that you can launch with EMR.</p>


<p>
<br/>
<a href="/news/">Spark News Archive</a>
</p>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are trademarks of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="//netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>

</body>
</html>