diff options
author | zsxwing <zsxwing@gmail.com> | 2015-08-19 18:36:01 -0700 |
---|---|---|
committer | Tathagata Das <tathagata.das1565@gmail.com> | 2015-08-19 18:36:01 -0700 |
commit | 1f29d502e7ecd6faa185d70dc714f9ea3922fb6d (patch) | |
tree | 3eabe5f24204341f8d13be9bd3ae3d637b40b87b /python/pyspark | |
parent | 2f2686a73f5a2a53ca5b1023e0d7e0e6c9be5896 (diff) | |
download | spark-1f29d502e7ecd6faa185d70dc714f9ea3922fb6d.tar.gz spark-1f29d502e7ecd6faa185d70dc714f9ea3922fb6d.tar.bz2 spark-1f29d502e7ecd6faa185d70dc714f9ea3922fb6d.zip |
[SPARK-9812] [STREAMING] Fix Python 3 compatibility issue in PySpark Streaming and some docs
This PR includes the following fixes:
1. Use `range` instead of `xrange` in `queue_stream.py` to support Python 3.
2. Fix the issue that `utf8_decoder` will return `bytes` rather than `str` when receiving an empty `bytes` in Python 3.
3. Fix the commands in docs so that the user can copy them directly to the command line. The previous commands was broken in the middle of a path, so when copying to the command line, the path would be split to two parts by the extra spaces, which forces the user to fix it manually.
Author: zsxwing <zsxwing@gmail.com>
Closes #8315 from zsxwing/SPARK-9812.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/streaming/flume.py | 4 | ||||
-rw-r--r-- | python/pyspark/streaming/kafka.py | 4 | ||||
-rw-r--r-- | python/pyspark/streaming/kinesis.py | 4 |
3 files changed, 9 insertions, 3 deletions
diff --git a/python/pyspark/streaming/flume.py b/python/pyspark/streaming/flume.py index cbb573f226..c0cdc50d8d 100644 --- a/python/pyspark/streaming/flume.py +++ b/python/pyspark/streaming/flume.py @@ -31,7 +31,9 @@ __all__ = ['FlumeUtils', 'utf8_decoder'] def utf8_decoder(s): """ Decode the unicode as UTF-8 """ - return s and s.decode('utf-8') + if s is None: + return None + return s.decode('utf-8') class FlumeUtils(object): diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py index dc5b7fd878..8a814c64c0 100644 --- a/python/pyspark/streaming/kafka.py +++ b/python/pyspark/streaming/kafka.py @@ -29,7 +29,9 @@ __all__ = ['Broker', 'KafkaUtils', 'OffsetRange', 'TopicAndPartition', 'utf8_dec def utf8_decoder(s): """ Decode the unicode as UTF-8 """ - return s and s.decode('utf-8') + if s is None: + return None + return s.decode('utf-8') class KafkaUtils(object): diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py index bcfe2703fe..34be5880e1 100644 --- a/python/pyspark/streaming/kinesis.py +++ b/python/pyspark/streaming/kinesis.py @@ -26,7 +26,9 @@ __all__ = ['KinesisUtils', 'InitialPositionInStream', 'utf8_decoder'] def utf8_decoder(s): """ Decode the unicode as UTF-8 """ - return s and s.decode('utf-8') + if s is None: + return None + return s.decode('utf-8') class KinesisUtils(object): |