From b8a0b6ea5ee409dc51e121915794bccce92d457c Mon Sep 17 00:00:00 2001 From: Aaron Davidson <aaron@databricks.com> Date: Fri, 6 Sep 2013 15:36:04 -0700 Subject: [PATCH] Memoize StorageLevels read from JVM --- .../scala/org/apache/spark/api/python/PythonRDD.scala | 2 +- python/pyspark/context.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 6ca56b3af6..c3b770a42c 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -275,7 +275,7 @@ private[spark] object PythonRDD { * Returns the StorageLevel with the given string name. * Throws an exception if the name is not a valid StorageLevel. */ - def getStorageLevel(name: String) : StorageLevel = { + def getStorageLevelByName(name: String) : StorageLevel = { // In Scala, "val MEMORY_ONLY" produces a public getter by the same name. val storageLevelGetter = StorageLevel.getClass().getDeclaredMethod(name) return storageLevelGetter.invoke(StorageLevel).asInstanceOf[StorageLevel] diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 49f9b4610d..514d56e200 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -281,16 +281,23 @@ class SparkContext(object): class StorageLevelReader: """ - Mimics the Scala StorageLevel by directing all attribute requests + Mimics the Scala StorageLevel by delegating all attribute requests (e.g., StorageLevel.DISK_ONLY) to the JVM for reflection. + Memoizes results to reduce JVM call/memory overheads. """ def __init__(self, sc): self.sc = sc + self.memoized = {} def __getattr__(self, name): + if name in self.memoized: + return self.memoized[name] + try: - return self.sc._jvm.PythonRDD.getStorageLevel(name) + storageLevel = self.sc._jvm.PythonRDD.getStorageLevelByName(name) + self.memoized[name] = storageLevel + return storageLevel except: print "Failed to find StorageLevel:", name -- GitLab