Updated docs based on Patrick's comments in PR 383.

author: Tathagata Das <tathagata.das1565@gmail.com> 2014-01-10 12:17:09 -0800
committer: Tathagata Das <tathagata.das1565@gmail.com> 2014-01-10 12:17:09 -0800
commit: e4bb845238d0df48f8258e925caf9af5a107af46 (patch)
tree: 3a11ad9abe691584cc64be3ca31403305f831686 /examples
parent: 2213a5a47fb2d73e3fdebec24356c69ea2968b81 (diff)
download: spark-e4bb845238d0df48f8258e925caf9af5a107af46.tar.gz
spark-e4bb845238d0df48f8258e925caf9af5a107af46.tar.bz2
spark-e4bb845238d0df48f8258e925caf9af5a107af46.zip
2 files changed, 40 insertions, 12 deletions
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
index aba1704825..4b896eaccb 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/NetworkWordCount.scala
@@ -21,7 +21,8 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
 import org.apache.spark.streaming.StreamingContext._
 
 /**
- * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ * Counts words in text encoded with UTF8 received from the network every second.
+ *
  * Usage: NetworkWordCount <master> <hostname> <port>
  *   <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
diff --git a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
index 739f805e87..d51e6e9418 100644
--- a/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/streaming/examples/RecoverableNetworkWordCount.scala
@@ -26,18 +26,41 @@ import com.google.common.io.Files
 import java.nio.charset.Charset
 
 /**
- * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
- * Usage: NetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-directory>
+ * Counts words in text encoded with UTF8 received from the network every second.
+ *
+ * Usage: NetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-file>
  *   <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
  *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
- *   <checkpoint-directory> directory in a Hadoop compatible file system to which checkpoint
- *                          data will be saved to; this must be a fault-tolerant file system
- *                          like HDFS for the system to recover from driver failures
- *   <checkpoint-
+ *   <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+ *   <output-file> file to which the word counts will be appended
+ *
+ * In local mode, <master> should be 'local[n]' with n > 1
+ * <checkpoint-directory> and <output-file> must be absolute paths
+ *
+ *
  * To run this on your local machine, you need to first run a Netcat server
- *    `$ nc -lk 9999`
- * and then run the example
- *    `$ ./run-example org.apache.spark.streaming.examples.NetworkWordCount local[2] localhost 9999`
+ *
+ *      `$ nc -lk 9999`
+ *
+ * and run the example as
+ *
+ *      `$ ./run-example org.apache.spark.streaming.examples.RecoverableNetworkWordCount \
+ *              local[2] localhost 9999 ~/checkpoint/ ~/out`
+ *
+ * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ * the checkpoint data.
+ *
+ * To run this example in a local standalone cluster with automatic driver recovery,
+ *
+ *      `$ ./spark-class org.apache.spark.deploy.Client -s launch <cluster-url> <path-to-examples-jar> \
+ *              org.apache.spark.streaming.examples.RecoverableNetworkWordCount <cluster-url> \
+ *              localhost 9999 ~/checkpoint ~/out`
+ *
+ * <path-to-examples-jar> would typically be <spark-dir>/examples/target/scala-XX/spark-examples....jar
+ *
+ * Refer to the online documentation for more details.
  */
 
 object RecoverableNetworkWordCount {
@@ -52,7 +75,7 @@ object RecoverableNetworkWordCount {
 
     // Create the context with a 1 second batch size
     val ssc = new StreamingContext(master, "RecoverableNetworkWordCount", Seconds(1),
-      System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")))
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass))
 
     // Create a NetworkInputDStream on target ip:port and count the
     // words in input stream of \n delimited text (eg. generated by 'nc')
@@ -74,9 +97,13 @@ object RecoverableNetworkWordCount {
       System.err.println(
         """
           |Usage: RecoverableNetworkWordCount <master> <hostname> <port> <checkpoint-directory> <output-file>
+          |     <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
+          |     <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+          |     <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+          |     <output-file> file to which the word counts will be appended
           |
           |In local mode, <master> should be 'local[n]' with n > 1
-          |Both <checkpoint-directory> and <output-file> should be full paths
+          |Both <checkpoint-directory> and <output-file> must be absolute paths
         """.stripMargin
       )
       System.exit(1)
author	Tathagata Das <tathagata.das1565@gmail.com>	2014-01-10 12:17:09 -0800
committer	Tathagata Das <tathagata.das1565@gmail.com>	2014-01-10 12:17:09 -0800
commit	e4bb845238d0df48f8258e925caf9af5a107af46 (patch)
tree	3a11ad9abe691584cc64be3ca31403305f831686 /examples
parent	2213a5a47fb2d73e3fdebec24356c69ea2968b81 (diff)
download	spark-e4bb845238d0df48f8258e925caf9af5a107af46.tar.gz spark-e4bb845238d0df48f8258e925caf9af5a107af46.tar.bz2 spark-e4bb845238d0df48f8258e925caf9af5a107af46.zip