aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorAaron Davidson <aaron@databricks.com>2013-09-06 15:36:04 -0700
committerAaron Davidson <aaron@databricks.com>2013-09-06 15:36:04 -0700
commitb8a0b6ea5ee409dc51e121915794bccce92d457c (patch)
tree0eea95b8bbf3d615b2f73fdbc958ee466d5825bb /python
parenta63d4c7dc2970900b116f7287e3d6b302d9d5698 (diff)
downloadspark-b8a0b6ea5ee409dc51e121915794bccce92d457c.tar.gz
spark-b8a0b6ea5ee409dc51e121915794bccce92d457c.tar.bz2
spark-b8a0b6ea5ee409dc51e121915794bccce92d457c.zip
Memoize StorageLevels read from JVM
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/context.py11
1 files changed, 9 insertions, 2 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 49f9b4610d..514d56e200 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -281,16 +281,23 @@ class SparkContext(object):
class StorageLevelReader:
"""
- Mimics the Scala StorageLevel by directing all attribute requests
+ Mimics the Scala StorageLevel by delegating all attribute requests
(e.g., StorageLevel.DISK_ONLY) to the JVM for reflection.
+ Memoizes results to reduce JVM call/memory overheads.
"""
def __init__(self, sc):
self.sc = sc
+ self.memoized = {}
def __getattr__(self, name):
+ if name in self.memoized:
+ return self.memoized[name]
+
try:
- return self.sc._jvm.PythonRDD.getStorageLevel(name)
+ storageLevel = self.sc._jvm.PythonRDD.getStorageLevelByName(name)
+ self.memoized[name] = storageLevel
+ return storageLevel
except:
print "Failed to find StorageLevel:", name