aboutsummaryrefslogtreecommitdiff
path: root/streaming
diff options
context:
space:
mode:
authorPatrick Wendell <pwendell@gmail.com>2013-01-17 18:35:24 -0800
committerPatrick Wendell <pwendell@gmail.com>2013-01-17 18:41:59 -0800
commit2261e62ee52495599b7a8717884e878497d343ea (patch)
treeb73cf6ea2bed63da5289f37b183a4fbe5474f63d /streaming
parent82b8707c6bbb3926e59c241b6e6d5ead5467aae7 (diff)
downloadspark-2261e62ee52495599b7a8717884e878497d343ea.tar.gz
spark-2261e62ee52495599b7a8717884e878497d343ea.tar.bz2
spark-2261e62ee52495599b7a8717884e878497d343ea.zip
Style cleanup
Diffstat (limited to 'streaming')
-rw-r--r--streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala22
-rw-r--r--streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala1
-rw-r--r--streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala6
3 files changed, 26 insertions, 3 deletions
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
index 32faef5670..e21e54d3e5 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaDStream.scala
@@ -6,6 +6,26 @@ import spark.api.java.JavaRDD
import java.util.{List => JList}
import spark.storage.StorageLevel
+/**
+ * A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous
+ * sequence of RDDs (of the same type) representing a continuous stream of data (see [[spark.RDD]]
+ * for more details on RDDs). DStreams can either be created from live data (such as, data from
+ * HDFS, Kafka or Flume) or it can be generated by transformation existing DStreams using operations
+ * such as `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming program is running, each
+ * DStream periodically generates a RDD, either from live data or by transforming the RDD generated
+ * by a parent DStream.
+ *
+ * This class contains the basic operations available on all DStreams, such as `map`, `filter` and
+ * `window`. In addition, [[spark.streaming.api.java.JavaPairDStream]] contains operations available
+ * only on DStreams of key-value pairs, such as `groupByKeyAndWindow` and `join`. These operations
+ * are automatically available on any DStream of the right type (e.g., DStream[(Int, Int)] through
+ * implicit conversions when `spark.streaming.StreamingContext._` is imported.
+ *
+ * DStreams internally is characterized by a few basic properties:
+ * - A list of other DStreams that the DStream depends on
+ * - A time interval at which the DStream generates an RDD
+ * - A function that is used to generate an RDD after each time interval
+ */
class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassManifest[T])
extends JavaDStreamLike[T, JavaDStream[T]] {
@@ -69,4 +89,4 @@ class JavaDStream[T](val dstream: DStream[T])(implicit val classManifest: ClassM
object JavaDStream {
implicit def fromDStream[T: ClassManifest](dstream: DStream[T]): JavaDStream[T] =
new JavaDStream[T](dstream)
-}
+} \ No newline at end of file
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
index 8c76d8c1d8..ef10c091ca 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaPairDStream.scala
@@ -14,7 +14,6 @@ import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
import org.apache.hadoop.conf.Configuration
import spark.api.java.JavaPairRDD
import spark.storage.StorageLevel
-import java.lang
import com.google.common.base.Optional
class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
diff --git a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
index ebbc516b38..7e1c2a999f 100644
--- a/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/spark/streaming/api/java/JavaStreamingContext.scala
@@ -12,10 +12,14 @@ import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
import java.io.InputStream
import java.util.{Map => JMap}
+/**
+ * A StreamingContext is the main entry point for Spark Streaming functionality. Besides the basic
+ * information (such as, cluster URL and job name) to internally create a SparkContext, it provides
+ * methods used to create DStream from various input sources.
+ */
class JavaStreamingContext(val ssc: StreamingContext) {
// TODOs:
- // - Test StreamingContext functions
// - Test to/from Hadoop functions
// - Support creating and registering InputStreams