aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/streaming/tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/streaming/tests.py')
-rw-r--r--python/pyspark/streaming/tests.py64
1 files changed, 64 insertions, 0 deletions
diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
index 188c8ff120..4ecae1e4bf 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -678,6 +678,70 @@ class KafkaStreamTests(PySparkStreamingTestCase):
rdd = KafkaUtils.createRDD(self.sc, kafkaParams, offsetRanges, leaders)
self._validateRddResult(sendData, rdd)
+ @unittest.skipIf(sys.version >= "3", "long type not support")
+ def test_kafka_rdd_get_offsetRanges(self):
+ """Test Python direct Kafka RDD get OffsetRanges."""
+ topic = self._randomTopic()
+ sendData = {"a": 3, "b": 4, "c": 5}
+ offsetRanges = [OffsetRange(topic, 0, long(0), long(sum(sendData.values())))]
+ kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
+
+ self._kafkaTestUtils.createTopic(topic)
+ self._kafkaTestUtils.sendMessages(topic, sendData)
+ rdd = KafkaUtils.createRDD(self.sc, kafkaParams, offsetRanges)
+ self.assertEqual(offsetRanges, rdd.offsetRanges())
+
+ @unittest.skipIf(sys.version >= "3", "long type not support")
+ def test_kafka_direct_stream_foreach_get_offsetRanges(self):
+ """Test the Python direct Kafka stream foreachRDD get offsetRanges."""
+ topic = self._randomTopic()
+ sendData = {"a": 1, "b": 2, "c": 3}
+ kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+ "auto.offset.reset": "smallest"}
+
+ self._kafkaTestUtils.createTopic(topic)
+ self._kafkaTestUtils.sendMessages(topic, sendData)
+
+ stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
+
+ offsetRanges = []
+
+ def getOffsetRanges(_, rdd):
+ for o in rdd.offsetRanges():
+ offsetRanges.append(o)
+
+ stream.foreachRDD(getOffsetRanges)
+ self.ssc.start()
+ self.wait_for(offsetRanges, 1)
+
+ self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
+
+ @unittest.skipIf(sys.version >= "3", "long type not support")
+ def test_kafka_direct_stream_transform_get_offsetRanges(self):
+ """Test the Python direct Kafka stream transform get offsetRanges."""
+ topic = self._randomTopic()
+ sendData = {"a": 1, "b": 2, "c": 3}
+ kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+ "auto.offset.reset": "smallest"}
+
+ self._kafkaTestUtils.createTopic(topic)
+ self._kafkaTestUtils.sendMessages(topic, sendData)
+
+ stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
+
+ offsetRanges = []
+
+ def transformWithOffsetRanges(rdd):
+ for o in rdd.offsetRanges():
+ offsetRanges.append(o)
+ return rdd
+
+ stream.transform(transformWithOffsetRanges).foreachRDD(lambda rdd: rdd.count())
+ self.ssc.start()
+ self.wait_for(offsetRanges, 1)
+
+ self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
+
class FlumeStreamTests(PySparkStreamingTestCase):
timeout = 20 # seconds