From b8a0b6ea5ee409dc51e121915794bccce92d457c Mon Sep 17 00:00:00 2001
From: Aaron Davidson <aaron@databricks.com>
Date: Fri, 6 Sep 2013 15:36:04 -0700
Subject: [PATCH] Memoize StorageLevels read from JVM

---
 .../scala/org/apache/spark/api/python/PythonRDD.scala |  2 +-
 python/pyspark/context.py                             | 11 +++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 6ca56b3af6..c3b770a42c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -275,7 +275,7 @@ private[spark] object PythonRDD {
    * Returns the StorageLevel with the given string name.
    * Throws an exception if the name is not a valid StorageLevel.
    */
-  def getStorageLevel(name: String) : StorageLevel = {
+  def getStorageLevelByName(name: String) : StorageLevel = {
     // In Scala, "val MEMORY_ONLY" produces a public getter by the same name.
     val storageLevelGetter = StorageLevel.getClass().getDeclaredMethod(name)
     return storageLevelGetter.invoke(StorageLevel).asInstanceOf[StorageLevel]
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 49f9b4610d..514d56e200 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -281,16 +281,23 @@ class SparkContext(object):
 
 class StorageLevelReader:
     """
-    Mimics the Scala StorageLevel by directing all attribute requests
+    Mimics the Scala StorageLevel by delegating all attribute requests
     (e.g., StorageLevel.DISK_ONLY) to the JVM for reflection.
+    Memoizes results to reduce JVM call/memory overheads.
     """
 
     def __init__(self, sc):
         self.sc = sc
+        self.memoized = {}
 
     def __getattr__(self, name):
+        if name in self.memoized: 
+          return self.memoized[name]
+
         try:
-            return self.sc._jvm.PythonRDD.getStorageLevel(name)
+            storageLevel = self.sc._jvm.PythonRDD.getStorageLevelByName(name)
+            self.memoized[name] = storageLevel
+            return storageLevel
         except:
             print "Failed to find StorageLevel:", name
 
-- 
GitLab