diff options
author | Patrick Wendell <pwendell@gmail.com> | 2013-01-17 18:35:24 -0800 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2013-01-17 18:41:59 -0800 |
commit | 2261e62ee52495599b7a8717884e878497d343ea (patch) | |
tree | b73cf6ea2bed63da5289f37b183a4fbe5474f63d /streaming | |
parent | 82b8707c6bbb3926e59c241b6e6d5ead5467aae7 (diff) | |
download | spark-2261e62ee52495599b7a8717884e878497d343ea.tar.gz spark-2261e62ee52495599b7a8717884e878497d343ea.tar.bz2 spark-2261e62ee52495599b7a8717884e878497d343ea.zip |
Style cleanup
Diffstat (limited to 'streaming')
3 files changed, 26 insertions, 3 deletions
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala index 32faef5670..e21e54d3e5 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala +++ b/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala @@ -6,6 +6,26 @@ import spark.api.java.JavaRDD import java.util.{List => JList} import spark.storage.StorageLevel +/** + * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous + * sequence of RDDs (of the same type) representing a continuous stream of data (see [[spark.RDD]] + * for more details on RDDs). DStreams can either be created from live data (such as, data from + * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations + * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each + * DStream periodically generates a RDD, either from live data or by transforming the RDD generated + * by a parent DStream. + * + * This class contains the basic operations available on all DStreams, such as `map`, `filter` and + * `window`. In addition, [[spark.streaming.api.java.JavaPairDStream]] contains operations available + * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`. These operations + * are automatically available on any DStream of the right type (e.g., DStream[(Int, Int)] through + * implicit conversions when `spark.streaming.StreamingContext._` is imported. + * + * DStreams internally is characterized by a few basic properties: + * - A list of other DStreams that the DStream depends on + * - A time interval at which the DStream generates an RDD + * - A function that is used to generate an RDD after each time interval + */ class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassManifest[T]) extends JavaDStreamLike[T, JavaDStream[T]] { @@ -69,4 +89,4 @@ class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassM object JavaDStream { implicit def fromDStream[T: ClassManifest](dstream: DStream[T]): JavaDStream[T] = new JavaDStream[T](dstream) -} +}
\ No newline at end of file diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala index 8c76d8c1d8..ef10c091ca 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala @@ -14,7 +14,6 @@ import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} import org.apache.hadoop.conf.Configuration import spark.api.java.JavaPairRDD import spark.storage.StorageLevel -import java.lang import com.google.common.base.Optional class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala index ebbc516b38..7e1c2a999f 100644 --- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala @@ -12,10 +12,14 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import java.io.InputStream import java.util.{Map => JMap} +/** + * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic + * information (such as, cluster URL and job name) to internally create a SparkContext, it provides + * methods used to create DStream from various input sources. + */ class JavaStreamingContext(val ssc: StreamingContext) { // TODOs: - // - Test StreamingContext functions // - Test to/from Hadoop functions // - Support creating and registering InputStreams |