diff options
author | Tathagata Das <tathagata.das1565@gmail.com> | 2014-01-10 12:17:09 -0800 |
---|---|---|
committer | Tathagata Das <tathagata.das1565@gmail.com> | 2014-01-10 12:17:09 -0800 |
commit | e4bb845238d0df48f8258e925caf9af5a107af46 (patch) | |
tree | 3a11ad9abe691584cc64be3ca31403305f831686 /examples/src | |
parent | 2213a5a47fb2d73e3fdebec24356c69ea2968b81 (diff) | |
download | spark-e4bb845238d0df48f8258e925caf9af5a107af46.tar.gz spark-e4bb845238d0df48f8258e925caf9af5a107af46.tar.bz2 spark-e4bb845238d0df48f8258e925caf9af5a107af46.zip |
Updated docs based on Patrick's comments in PR 383.
Diffstat (limited to 'examples/src')
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala | 3 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala | 49 |
2 files changed, 40 insertions, 12 deletions
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala index aba1704825..4b896eaccb 100644 --- a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala @@ -21,7 +21,8 @@ import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.StreamingContext._ /** - * Counts words in UTF8 encoded, '\n' delimited text received from the network every second. + * Counts words in text encoded with UTF8 received from the network every second. + * * Usage: NetworkWordCount <master> <hostname> <port> * <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1. * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data. diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala index 739f805e87..d51e6e9418 100644 --- a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala @@ -26,18 +26,41 @@ import com.google.common.io.Files import java.nio.charset.Charset /** - * Counts words in UTF8 encoded, '\n' delimited text received from the network every second. - * Usage: NetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-directory> + * Counts words in text encoded with UTF8 received from the network every second. + * + * Usage: NetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-file> * <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1. * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data. - * <checkpoint-directory> directory in a Hadoop compatible file system to which checkpoint - * data will be saved to; this must be a fault-tolerant file system - * like HDFS for the system to recover from driver failures - * <checkpoint- + * <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data + * <output-file> file to which the word counts will be appended + * + * In local mode, <master> should be 'local[n]' with n > 1 + * <checkpoint-directory> and <output-file> must be absolute paths + * + * * To run this on your local machine, you need to first run a Netcat server - * `$ nc -lk 9999` - * and then run the example - * `$ ./run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999` + * + * `$ nc -lk 9999` + * + * and run the example as + * + * `$ ./run-example org.apache.spark.streaming.examples.RecoverableNetworkWordCount \ + * local[2] localhost 9999 ~/checkpoint/ ~/out` + * + * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create + * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if + * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from + * the checkpoint data. + * + * To run this example in a local standalone cluster with automatic driver recovery, + * + * `$ ./spark-class org.apache.spark.deploy.Client -s launch <cluster-url> <path-to-examples-jar> \ + * org.apache.spark.streaming.examples.RecoverableNetworkWordCount <cluster-url> \ + * localhost 9999 ~/checkpoint ~/out` + * + * <path-to-examples-jar> would typically be <spark-dir>/examples/target/scala-XX/spark-examples....jar + * + * Refer to the online documentation for more details. */ object RecoverableNetworkWordCount { @@ -52,7 +75,7 @@ object RecoverableNetworkWordCount { // Create the context with a 1 second batch size val ssc = new StreamingContext(master, "RecoverableNetworkWordCount", Seconds(1), - System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) + System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass)) // Create a NetworkInputDStream on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') @@ -74,9 +97,13 @@ object RecoverableNetworkWordCount { System.err.println( """ |Usage: RecoverableNetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-file> + | <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1. + | <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data. + | <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data + | <output-file> file to which the word counts will be appended | |In local mode, <master> should be 'local[n]' with n > 1 - |Both <checkpoint-directory> and <output-file> should be full paths + |Both <checkpoint-directory> and <output-file> must be absolute paths """.stripMargin ) System.exit(1) |