diff options
author | Aaron Davidson <aaron@databricks.com> | 2013-09-06 15:36:04 -0700 |
---|---|---|
committer | Aaron Davidson <aaron@databricks.com> | 2013-09-06 15:36:04 -0700 |
commit | b8a0b6ea5ee409dc51e121915794bccce92d457c (patch) | |
tree | 0eea95b8bbf3d615b2f73fdbc958ee466d5825bb /python | |
parent | a63d4c7dc2970900b116f7287e3d6b302d9d5698 (diff) | |
download | spark-b8a0b6ea5ee409dc51e121915794bccce92d457c.tar.gz spark-b8a0b6ea5ee409dc51e121915794bccce92d457c.tar.bz2 spark-b8a0b6ea5ee409dc51e121915794bccce92d457c.zip |
Memoize StorageLevels read from JVM
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/context.py | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 49f9b4610d..514d56e200 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -281,16 +281,23 @@ class SparkContext(object): class StorageLevelReader: """ - Mimics the Scala StorageLevel by directing all attribute requests + Mimics the Scala StorageLevel by delegating all attribute requests (e.g., StorageLevel.DISK_ONLY) to the JVM for reflection. + Memoizes results to reduce JVM call/memory overheads. """ def __init__(self, sc): self.sc = sc + self.memoized = {} def __getattr__(self, name): + if name in self.memoized: + return self.memoized[name] + try: - return self.sc._jvm.PythonRDD.getStorageLevel(name) + storageLevel = self.sc._jvm.PythonRDD.getStorageLevelByName(name) + self.memoized[name] = storageLevel + return storageLevel except: print "Failed to find StorageLevel:", name |