diff options
author | zsxwing <zsxwing@gmail.com> | 2015-08-19 18:36:01 -0700 |
---|---|---|
committer | Tathagata Das <tathagata.das1565@gmail.com> | 2015-08-19 18:36:01 -0700 |
commit | 1f29d502e7ecd6faa185d70dc714f9ea3922fb6d (patch) | |
tree | 3eabe5f24204341f8d13be9bd3ae3d637b40b87b /examples/src/main/python | |
parent | 2f2686a73f5a2a53ca5b1023e0d7e0e6c9be5896 (diff) | |
download | spark-1f29d502e7ecd6faa185d70dc714f9ea3922fb6d.tar.gz spark-1f29d502e7ecd6faa185d70dc714f9ea3922fb6d.tar.bz2 spark-1f29d502e7ecd6faa185d70dc714f9ea3922fb6d.zip |
[SPARK-9812] [STREAMING] Fix Python 3 compatibility issue in PySpark Streaming and some docs
This PR includes the following fixes:
1. Use `range` instead of `xrange` in `queue_stream.py` to support Python 3.
2. Fix the issue that `utf8_decoder` will return `bytes` rather than `str` when receiving an empty `bytes` in Python 3.
3. Fix the commands in docs so that the user can copy them directly to the command line. The previous commands was broken in the middle of a path, so when copying to the command line, the path would be split to two parts by the extra spaces, which forces the user to fix it manually.
Author: zsxwing <zsxwing@gmail.com>
Closes #8315 from zsxwing/SPARK-9812.
Diffstat (limited to 'examples/src/main/python')
5 files changed, 14 insertions, 11 deletions
diff --git a/examples/src/main/python/streaming/direct_kafka_wordcount.py b/examples/src/main/python/streaming/direct_kafka_wordcount.py index 6ef188a220..ea20678b9a 100644 --- a/examples/src/main/python/streaming/direct_kafka_wordcount.py +++ b/examples/src/main/python/streaming/direct_kafka_wordcount.py @@ -23,8 +23,8 @@ http://kafka.apache.org/documentation.html#quickstart and then run the example - `$ bin/spark-submit --jars external/kafka-assembly/target/scala-*/\ - spark-streaming-kafka-assembly-*.jar \ + `$ bin/spark-submit --jars \ + external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \ examples/src/main/python/streaming/direct_kafka_wordcount.py \ localhost:9092 test` """ @@ -37,7 +37,7 @@ from pyspark.streaming.kafka import KafkaUtils if __name__ == "__main__": if len(sys.argv) != 3: - print >> sys.stderr, "Usage: direct_kafka_wordcount.py <broker_list> <topic>" + print("Usage: direct_kafka_wordcount.py <broker_list> <topic>", file=sys.stderr) exit(-1) sc = SparkContext(appName="PythonStreamingDirectKafkaWordCount") diff --git a/examples/src/main/python/streaming/flume_wordcount.py b/examples/src/main/python/streaming/flume_wordcount.py index 091b64d8c4..d75bc6daac 100644 --- a/examples/src/main/python/streaming/flume_wordcount.py +++ b/examples/src/main/python/streaming/flume_wordcount.py @@ -23,8 +23,9 @@ https://flume.apache.org/documentation.html and then run the example - `$ bin/spark-submit --jars external/flume-assembly/target/scala-*/\ - spark-streaming-flume-assembly-*.jar examples/src/main/python/streaming/flume_wordcount.py \ + `$ bin/spark-submit --jars \ + external/flume-assembly/target/scala-*/spark-streaming-flume-assembly-*.jar \ + examples/src/main/python/streaming/flume_wordcount.py \ localhost 12345 """ from __future__ import print_function diff --git a/examples/src/main/python/streaming/kafka_wordcount.py b/examples/src/main/python/streaming/kafka_wordcount.py index b178e7899b..8d697f620f 100644 --- a/examples/src/main/python/streaming/kafka_wordcount.py +++ b/examples/src/main/python/streaming/kafka_wordcount.py @@ -23,8 +23,9 @@ http://kafka.apache.org/documentation.html#quickstart and then run the example - `$ bin/spark-submit --jars external/kafka-assembly/target/scala-*/\ - spark-streaming-kafka-assembly-*.jar examples/src/main/python/streaming/kafka_wordcount.py \ + `$ bin/spark-submit --jars \ + external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \ + examples/src/main/python/streaming/kafka_wordcount.py \ localhost:2181 test` """ from __future__ import print_function diff --git a/examples/src/main/python/streaming/mqtt_wordcount.py b/examples/src/main/python/streaming/mqtt_wordcount.py index 617ce5ea67..abf9c0e21d 100644 --- a/examples/src/main/python/streaming/mqtt_wordcount.py +++ b/examples/src/main/python/streaming/mqtt_wordcount.py @@ -26,8 +26,9 @@ http://www.eclipse.org/paho/#getting-started and then run the example - `$ bin/spark-submit --jars external/mqtt-assembly/target/scala-*/\ - spark-streaming-mqtt-assembly-*.jar examples/src/main/python/streaming/mqtt_wordcount.py \ + `$ bin/spark-submit --jars \ + external/mqtt-assembly/target/scala-*/spark-streaming-mqtt-assembly-*.jar \ + examples/src/main/python/streaming/mqtt_wordcount.py \ tcp://localhost:1883 foo` """ diff --git a/examples/src/main/python/streaming/queue_stream.py b/examples/src/main/python/streaming/queue_stream.py index dcd6a0fc6f..b3808907f7 100644 --- a/examples/src/main/python/streaming/queue_stream.py +++ b/examples/src/main/python/streaming/queue_stream.py @@ -36,8 +36,8 @@ if __name__ == "__main__": # Create the queue through which RDDs can be pushed to # a QueueInputDStream rddQueue = [] - for i in xrange(5): - rddQueue += [ssc.sparkContext.parallelize([j for j in xrange(1, 1001)], 10)] + for i in range(5): + rddQueue += [ssc.sparkContext.parallelize([j for j in range(1, 1001)], 10)] # Create the QueueInputDStream and use it do some processing inputStream = ssc.queueStream(rddQueue) |