From 499ac3e69a102f9b10a1d7e14382fa191516f7b5 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 18 Dec 2015 20:06:05 -0800 Subject: [SPARK-12091] [PYSPARK] Deprecate the JAVA-specific deserialized storage levels The current default storage level of Python persist API is MEMORY_ONLY_SER. This is different from the default level MEMORY_ONLY in the official document and RDD APIs. davies Is this inconsistency intentional? Thanks! Updates: Since the data is always serialized on the Python side, the storage levels of JAVA-specific deserialization are not removed, such as MEMORY_ONLY. Updates: Based on the reviewers' feedback. In Python, stored objects will always be serialized with the [Pickle](https://docs.python.org/2/library/pickle.html) library, so it does not matter whether you choose a serialized level. The available storage levels in Python include `MEMORY_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK`, `MEMORY_AND_DISK_2`, `DISK_ONLY`, `DISK_ONLY_2` and `OFF_HEAP`. Author: gatorsmile Closes #10092 from gatorsmile/persistStorageLevel. --- python/pyspark/streaming/flume.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'python/pyspark/streaming/flume.py') diff --git a/python/pyspark/streaming/flume.py b/python/pyspark/streaming/flume.py index b3d1905365..b1fff0a5c7 100644 --- a/python/pyspark/streaming/flume.py +++ b/python/pyspark/streaming/flume.py @@ -40,7 +40,7 @@ class FlumeUtils(object): @staticmethod def createStream(ssc, hostname, port, - storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2, + storageLevel=StorageLevel.MEMORY_AND_DISK_2, enableDecompression=False, bodyDecoder=utf8_decoder): """ @@ -70,7 +70,7 @@ class FlumeUtils(object): @staticmethod def createPollingStream(ssc, addresses, - storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2, + storageLevel=StorageLevel.MEMORY_AND_DISK_2, maxBatchSize=1000, parallelism=5, bodyDecoder=utf8_decoder): -- cgit v1.2.3