summaryrefslogblamecommitdiff
path: root/site/streaming/index.html
blob: 726b8556405fc372b313632a52e31e20fbafa8c3 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14













                                                                        



                                                                                                                                  



                                                      


                                                          











                                                                                                                     


                                                      
                  
 














                                                                                     
                                                         
                                                                                           


                                        





                                                  
                                                 

























                                                                                      
                                         

                                  
                                                         

                                                                 
                                                        
                                   
                                                                                                                            






                                                                   
                                                                           
                                                                                       

             
                                                    
                           
                                                                                 






                                                                                                              

                                                                                                                



                                          












                                                                                              










                                                   


                                                                                                                                       


                                                                                


                                                                                


                                                                                







                                                                                                      
                           
          
                            
                                                       

                                                               
                                                      
           
                                                                                                               















                                                                                 
                                           

                                                                                               
                                         


























































                                                                                                                                    













































                                                                                                                                                                                                                        
                                                                                                                                                                                                                                                                                                       






                                                                           
                                                                                    



        

        




                      
                                                                                                  
                                                                     



         

       
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>
     Spark Streaming | Apache Spark
    
  </title>

  

  
    <meta name="description" content="Spark Streaming makes it easy to build scalable and fault-tolerant streaming applications.">
  

  <!-- Bootstrap core CSS -->
  <link href="/css/cerulean.min.css" rel="stylesheet">
  <link href="/css/custom.css" rel="stylesheet">

  <!-- Code highlighter CSS -->
  <link href="/css/pygments-default.css" rel="stylesheet">

  <script type="text/javascript">
  <!-- Google Analytics initialization -->
  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-32518208-2']);
  _gaq.push(['_trackPageview']);
  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

  <!-- Adds slight delay to links to allow async reporting -->
  function trackOutboundLink(link, category, action) {
    try {
      _gaq.push(['_trackEvent', category , action]);
    } catch(err){}

    setTimeout(function() {
      document.location.href = link.href;
    }, 100);
  }
  </script>

  <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
  <!--[if lt IE 9]>
  <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
  <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
  <![endif]-->
</head>

<body>

<script src="https://code.jquery.com/jquery.js"></script>
<script src="https://netdna.bootstrapcdn.com/bootstrap/3.0.3/js/bootstrap.min.js"></script>
<script src="/js/lang-tabs.js"></script>
<script src="/js/downloads.js"></script>

<div class="container" style="max-width: 1200px;">

<div class="masthead">
  
    <p class="lead">
      <a href="/">
      <img src="/images/spark-logo-trademark.png"
      style="height:100px; width:auto; vertical-align: bottom; margin-top: 20px;"></a>
      <a href="#"><span class="subproject">
        Streaming
      </span></a>
    </p>
  
</div>

<nav class="navbar navbar-default" role="navigation">
  <!-- Brand and toggle get grouped for better mobile display -->
  <div class="navbar-header">
    <button type="button" class="navbar-toggle" data-toggle="collapse"
            data-target="#navbar-collapse-1">
      <span class="sr-only">Toggle navigation</span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
      <span class="icon-bar"></span>
    </button>
  </div>

  <!-- Collect the nav links, forms, and other content for toggling -->
  <div class="collapse navbar-collapse" id="navbar-collapse-1">
    <ul class="nav navbar-nav">
      <li><a href="/downloads.html">Download</a></li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Libraries <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/sql/">SQL and DataFrames</a></li>
          <li><a href="/streaming/">Spark Streaming</a></li>
          <li><a href="/mllib/">MLlib (machine learning)</a></li>
          <li><a href="/graphx/">GraphX (graph)</a></li>
          <li class="divider"></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third-Party Packages</a></li>
        </ul>
      </li>
      <li class="dropdown">
        <a href="#" class="dropdown-toggle" data-toggle="dropdown">
          Documentation <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/docs/latest/">Latest Release (Spark 2.0.2)</a></li>
          <li><a href="/documentation.html">Older Versions and Other Resources</a></li>
        </ul>
      </li>
      <li><a href="/examples.html">Examples</a></li>
      <li class="dropdown">
        <a href="/community.html" class="dropdown-toggle" data-toggle="dropdown">
          Community <b class="caret"></b>
        </a>
        <ul class="dropdown-menu">
          <li><a href="/community.html">Mailing Lists</a></li>
          <li><a href="/community.html#events">Events and Meetups</a></li>
          <li><a href="/community.html#history">Project History</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Powered+By+Spark">Powered By</a></li>
          <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Committers">Project Committers</a></li>
          <li><a href="https://issues.apache.org/jira/browse/SPARK">Issue Tracker</a></li>
        </ul>
      </li>
      <li><a href="/faq.html">FAQ</a></li>
    </ul>
    <ul class="nav navbar-nav navbar-right">
      <li class="dropdown">
        <a href="http://www.apache.org/" class="dropdown-toggle" data-toggle="dropdown">
          Apache Software Foundation <b class="caret"></b></a>
        <ul class="dropdown-menu">
          <li><a href="http://www.apache.org/">Apache Homepage</a></li>
          <li><a href="http://www.apache.org/licenses/">License</a></li>
          <li><a href="http://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
          <li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
          <li><a href="http://www.apache.org/security/">Security</a></li>
        </ul>
      </li>
    </ul>
  </div>
  <!-- /.navbar-collapse -->
</nav>


<div class="row">
  <div class="col-md-3 col-md-push-9">
    <div class="news" style="margin-bottom: 20px;">
      <h5>Latest News</h5>
      <ul class="list-unstyled">
        
          <li><a href="/news/spark-wins-cloudsort-100tb-benchmark.html">Spark wins CloudSort Benchmark as the most efficient engine</a>
          <span class="small">(Nov 15, 2016)</span></li>
        
          <li><a href="/news/spark-2-0-2-released.html">Spark 2.0.2 released</a>
          <span class="small">(Nov 14, 2016)</span></li>
        
          <li><a href="/news/spark-1-6-3-released.html">Spark 1.6.3 released</a>
          <span class="small">(Nov 07, 2016)</span></li>
        
          <li><a href="/news/spark-2-0-1-released.html">Spark 2.0.1 released</a>
          <span class="small">(Oct 03, 2016)</span></li>
        
      </ul>
      <p class="small" style="text-align: right;"><a href="/news/index.html">Archive</a></p>
    </div>
    <div class="hidden-xs hidden-sm">
      <a href="/downloads.html" class="btn btn-success btn-lg btn-block" style="margin-bottom: 30px;">
        Download Spark
      </a>
      <p style="font-size: 16px; font-weight: 500; color: #555;">
        Built-in Libraries:
      </p>
      <ul class="list-none">
        <li><a href="/sql/">SQL and DataFrames</a></li>
        <li><a href="/streaming/">Spark Streaming</a></li>
        <li><a href="/mllib/">MLlib (machine learning)</a></li>
        <li><a href="/graphx/">GraphX (graph)</a></li>
      </ul>
      <a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third-Party Packages</a>
    </div>
  </div>

  <div class="col-md-9 col-md-pull-3">
    <div class="jumbotron">
  <b>Spark Streaming</b> makes it easy to build scalable fault-tolerant streaming
  applications.
</div>

<div class="row row-padded">
  <div class="col-md-7 col-sm-7">
    <h2>Ease of Use</h2>
    <p class="lead">
      Build applications through high-level operators.
    </p>
    <p>
      Spark Streaming brings Apache Spark's
      <a href="/docs/latest/streaming-programming-guide.html">language-integrated API</a>
      to stream processing, letting you write streaming jobs the same way you write batch jobs.
      It supports Java, Scala and Python.
    </p>
  </div>
  <div class="col-md-5 col-sm-5 col-padded-top col-center">

    <div style="margin-top: 15px; text-align: left; display: inline-block;">
      <div class="code">
        TwitterUtils.createStream(...)<br />
        &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">filter</span>(<span class="closure">_.getText.contains("Spark")</span>)<br />
        &nbsp;&nbsp;&nbsp;&nbsp;.<span class="sparkop">countByWindow</span>(Seconds(5))
      </div>
      <div class="caption">Counting tweets on a sliding window</div>
    </div>
  </div>
</div>

<div class="row row-padded">
  <div class="col-md-7 col-sm-7">
    <h2>Fault Tolerance</h2>
    <p class="lead">
      Stateful exactly-once semantics out of the box.
    </p>
    <p>
      Spark Streaming recovers both lost work
      and operator state (e.g. sliding windows) out of the box, without any extra code on your part.
    </p>
  </div>
  <div class="col-md-5 col-sm-5 col-padded-top col-center">
    <div style="width: 100%; max-width: 300px; display: inline-block;">
      <img src="/images/spark-streaming-recovery.png" style="width: 100%; max-width: 300px;" />
    </div>
  </div>
</div>

<div class="row row-padded">
  <div class="col-md-7 col-sm-7">
    <h2>Spark Integration</h2>
    <p class="lead">
      Combine streaming with batch and interactive queries.
    </p>
    <p>
      By running on Spark, Spark Streaming lets you reuse the same code for batch
      processing, join streams against historical data, or run ad-hoc
      queries on stream state.
      Build powerful interactive applications, not just analytics.
    </p>
  </div>
  <div class="col-md-5 col-sm-5 col-padded-top col-center">
    <div style="margin-top: 20px; text-align: left; display: inline-block;">
      <div class="code">
        stream.<span class="sparkop">join</span>(historicCounts).<span class="sparkop">filter</span> {<span class="closure"><br />
        &nbsp;&nbsp;case (word, (curCount, oldCount)) =&gt;<br />
        &nbsp;&nbsp;&nbsp;&nbsp;curCount &gt; oldCount<br />
        </span>}
      </div>
      <div class="caption">Find words with higher frequency than historic data</div>
    </div>
  </div>
</div>

<div class="row">
  <div class="col-md-4 col-padded">
    <h3>Deployment Options</h3>
    <p>
      Spark Streaming can read data from
      <a href="http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">HDFS</a>,
      <a href="http://flume.apache.org">Flume</a>,
      <a href="http://kafka.apache.org">Kafka</a>,
      <a href="https://dev.twitter.com">Twitter</a> and
      <a href="http://zeromq.org">ZeroMQ</a>.
      You can also define your own custom data sources.
    </p>
    <p>
      You can run Spark Streaming on Spark's <a href="/docs/latest/spark-standalone.html">standalone cluster mode</a>
      or <a href="/docs/latest/ec2-scripts.html">EC2</a>.
      It also includes a local run mode for development.
      In production,
      Spark Streaming uses <a href="http://zookeeper.apache.org">ZooKeeper</a> and <a href="http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">HDFS</a> for high availability.
    </p>
  </div>

  <div class="col-md-4 col-padded">
    <h3>Community</h3>
    <p>
      Spark Streaming is developed as part of Apache Spark. It thus gets
      tested and updated with each Spark release.
    </p>
    <p>
      If you have questions about the system, ask on the
      <a href="/community.html#mailing-lists">Spark mailing lists</a>.
    </p>
    <p>
      The Spark Streaming developers welcome contributions. If you'd like to help out,
      read <a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">how to
      contribute to Spark</a>, and send us a patch!
    </p>
  </div>

  <div class="col-md-4 col-padded">
    <h3>Getting Started</h3>
    <p>
      To get started with Spark Streaming:
    </p>
    <ul class="list-narrow">
      <li><a href="/downloads.html">Download Spark</a>. It includes Streaming as a module.</li>
      <li>Read the <a href="/docs/latest/streaming-programming-guide.html">Spark Streaming programming guide</a>, which includes a tutorial and describes system architecture, configuration and high availability.</li>
      <li>Check out example programs in <a href="https://github.com/apache/spark/tree/master/examples/src/main/scala/org/apache/spark/examples/streaming">Scala</a> and <a href="https://github.com/apache/spark/tree/master/examples/src/main/java/org/apache/spark/examples/streaming">Java</a>.</li>
    </ul>
  </div>
</div>

<div class="row">
  <div class="col-sm-12 col-center">
    <a href="/downloads.html" class="btn btn-success btn-lg btn-multiline">
      Download Apache Spark<br /><span class="small">Includes Spark Streaming</span>
    </a>
  </div>
</div>

  </div>
</div>



<footer class="small">
  <hr>
  Apache Spark, Spark, Apache, and the Spark logo are <a href="/trademarks.html">trademarks</a> of
  <a href="http://www.apache.org">The Apache Software Foundation</a>.
</footer>

</div>

</body>
</html>