aboutsummaryrefslogtreecommitdiff
path: root/docs/streaming-programming-guide.md
diff options
context:
space:
mode:
authorDmitriy Sokolov <silentsokolov@gmail.com>2016-08-30 11:23:37 +0100
committerSean Owen <sowen@cloudera.com>2016-08-30 11:23:37 +0100
commitd4eee9932edf1a489d7fe9120a0f003150834df6 (patch)
tree2b05ac9cfaf1e76ca0b44e579d54ec8b3b7494f2 /docs/streaming-programming-guide.md
parentbefab9c1c6b59ad90f63a7d10e12b186be897f15 (diff)
downloadspark-d4eee9932edf1a489d7fe9120a0f003150834df6.tar.gz
spark-d4eee9932edf1a489d7fe9120a0f003150834df6.tar.bz2
spark-d4eee9932edf1a489d7fe9120a0f003150834df6.zip
[MINOR][DOCS] Fix minor typos in python example code
## What changes were proposed in this pull request? Fix minor typos python example code in streaming programming guide ## How was this patch tested? N/A Author: Dmitriy Sokolov <silentsokolov@gmail.com> Closes #14805 from silentsokolov/fix-typos.
Diffstat (limited to 'docs/streaming-programming-guide.md')
-rw-r--r--docs/streaming-programming-guide.md33
1 files changed, 16 insertions, 17 deletions
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 82d36474ff..c0e4f3b35a 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -930,7 +930,7 @@ JavaPairDStream<String, Integer> cleanedDStream = wordCounts.transform(
<div data-lang="python" markdown="1">
{% highlight python %}
-spamInfoRDD = sc.pickleFile(...) # RDD containing spam information
+spamInfoRDD = sc.pickleFile(...) # RDD containing spam information
# join data stream with spam information to do data cleaning
cleanedDStream = wordCounts.transform(lambda rdd: rdd.join(spamInfoRDD).filter(...))
@@ -1495,16 +1495,15 @@ See the full [source code]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_
</div>
<div data-lang="python" markdown="1">
{% highlight python %}
-
def getWordBlacklist(sparkContext):
- if ('wordBlacklist' not in globals()):
- globals()['wordBlacklist'] = sparkContext.broadcast(["a", "b", "c"])
- return globals()['wordBlacklist']
+ if ("wordBlacklist" not in globals()):
+ globals()["wordBlacklist"] = sparkContext.broadcast(["a", "b", "c"])
+ return globals()["wordBlacklist"]
def getDroppedWordsCounter(sparkContext):
- if ('droppedWordsCounter' not in globals()):
- globals()['droppedWordsCounter'] = sparkContext.accumulator(0)
- return globals()['droppedWordsCounter']
+ if ("droppedWordsCounter" not in globals()):
+ globals()["droppedWordsCounter"] = sparkContext.accumulator(0)
+ return globals()["droppedWordsCounter"]
def echo(time, rdd):
# Get or register the blacklist Broadcast
@@ -1626,12 +1625,12 @@ See the full [source code]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_
# Lazily instantiated global instance of SparkSession
def getSparkSessionInstance(sparkConf):
- if ('sparkSessionSingletonInstance' not in globals()):
- globals()['sparkSessionSingletonInstance'] = SparkSession\
- .builder\
- .config(conf=sparkConf)\
+ if ("sparkSessionSingletonInstance" not in globals()):
+ globals()["sparkSessionSingletonInstance"] = SparkSession \
+ .builder \
+ .config(conf=sparkConf) \
.getOrCreate()
- return globals()['sparkSessionSingletonInstance']
+ return globals()["sparkSessionSingletonInstance"]
...
@@ -1829,11 +1828,11 @@ This behavior is made simple by using `StreamingContext.getOrCreate`. This is us
{% highlight python %}
# Function to create and setup a new StreamingContext
def functionToCreateContext():
- sc = SparkContext(...) # new context
- ssc = new StreamingContext(...)
- lines = ssc.socketTextStream(...) # create DStreams
+ sc = SparkContext(...) # new context
+ ssc = StreamingContext(...)
+ lines = ssc.socketTextStream(...) # create DStreams
...
- ssc.checkpoint(checkpointDirectory) # set checkpoint directory
+ ssc.checkpoint(checkpointDirectory) # set checkpoint directory
return ssc
# Get StreamingContext from checkpoint data or create a new one