aboutsummaryrefslogtreecommitdiff
path: root/docs/streaming-programming-guide.md
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-08-12 10:00:58 +0100
committerSean Owen <sowen@cloudera.com>2016-08-12 10:00:58 +0100
commitf4482225c405b9cfe078deac74e4c28e2dcc97c3 (patch)
treeeacdde1d4a2cab3177e63029a7f532c498a34454 /docs/streaming-programming-guide.md
parent993923c8f5ca719daf905285738b7fdcaf944d8c (diff)
downloadspark-f4482225c405b9cfe078deac74e4c28e2dcc97c3.tar.gz
spark-f4482225c405b9cfe078deac74e4c28e2dcc97c3.tar.bz2
spark-f4482225c405b9cfe078deac74e4c28e2dcc97c3.zip
[MINOR][DOC] Fix style in examples across documentation
## What changes were proposed in this pull request? This PR fixes the documentation as below: - Python has 4 spaces and Java and Scala has 2 spaces (See https://cwiki.apache.org/confluence/display/SPARK/Spark+Code+Style+Guide). - Avoid excessive parentheses and curly braces for anonymous functions. (See https://github.com/databricks/scala-style-guide#anonymous) ## How was this patch tested? N/A Author: hyukjinkwon <gurwls223@gmail.com> Closes #14593 from HyukjinKwon/minor-documentation.
Diffstat (limited to 'docs/streaming-programming-guide.md')
-rw-r--r--docs/streaming-programming-guide.md28
1 files changed, 14 insertions, 14 deletions
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 3d40b2c313..aef62ea900 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -863,7 +863,7 @@ Java code, take a look at the example
{% highlight python %}
def updateFunction(newValues, runningCount):
if runningCount is None:
- runningCount = 0
+ runningCount = 0
return sum(newValues, runningCount) # add the new values with the previous running count to get the new count
{% endhighlight %}
@@ -903,10 +903,10 @@ spam information (maybe generated with Spark as well) and then filtering based o
{% highlight scala %}
val spamInfoRDD = ssc.sparkContext.newAPIHadoopRDD(...) // RDD containing spam information
-val cleanedDStream = wordCounts.transform(rdd => {
+val cleanedDStream = wordCounts.transform { rdd =>
rdd.join(spamInfoRDD).filter(...) // join data stream with spam information to do data cleaning
...
-})
+}
{% endhighlight %}
</div>
@@ -1142,12 +1142,12 @@ val joinedStream = windowedStream.transform { rdd => rdd.join(dataset) }
JavaPairRDD<String, String> dataset = ...
JavaPairDStream<String, String> windowedStream = stream.window(Durations.seconds(20));
JavaPairDStream<String, String> joinedStream = windowedStream.transform(
- new Function<JavaRDD<Tuple2<String, String>>, JavaRDD<Tuple2<String, String>>>() {
- @Override
- public JavaRDD<Tuple2<String, String>> call(JavaRDD<Tuple2<String, String>> rdd) {
- return rdd.join(dataset);
- }
+ new Function<JavaRDD<Tuple2<String, String>>, JavaRDD<Tuple2<String, String>>>() {
+ @Override
+ public JavaRDD<Tuple2<String, String>> call(JavaRDD<Tuple2<String, String>> rdd) {
+ return rdd.join(dataset);
}
+ }
);
{% endhighlight %}
</div>
@@ -1611,7 +1611,7 @@ words.foreachRDD(
// Do word count on table using SQL and print it
DataFrame wordCountsDataFrame =
- spark.sql("select word, count(*) as total from words group by word");
+ spark.sql("select word, count(*) as total from words group by word");
wordCountsDataFrame.show();
return null;
}
@@ -1759,11 +1759,11 @@ This behavior is made simple by using `StreamingContext.getOrCreate`. This is us
{% highlight scala %}
// Function to create and setup a new StreamingContext
def functionToCreateContext(): StreamingContext = {
- val ssc = new StreamingContext(...) // new context
- val lines = ssc.socketTextStream(...) // create DStreams
- ...
- ssc.checkpoint(checkpointDirectory) // set checkpoint directory
- ssc
+ val ssc = new StreamingContext(...) // new context
+ val lines = ssc.socketTextStream(...) // create DStreams
+ ...
+ ssc.checkpoint(checkpointDirectory) // set checkpoint directory
+ ssc
}
// Get StreamingContext from checkpoint data or create a new one