From 58c6fa2041b99160f254b17c2b71de9d82c53f8c Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@databricks.com>
Date: Sun, 29 Dec 2013 14:46:59 -0500
Subject: [PATCH] Add Python docs about SparkConf

---
 python/pyspark/conf.py    | 42 +++++++++++++++++++++++++++++++++++++++
 python/pyspark/context.py |  3 ++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index eb7a6c13fe..a79f348b52 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -50,29 +50,62 @@ u'value1'
 
 
 class SparkConf(object):
+    """
+    Configuration for a Spark application. Used to set various Spark
+    parameters as key-value pairs.
+
+    Most of the time, you would create a SparkConf object with
+    C{SparkConf()}, which will load values from `spark.*` Java system
+    properties and any `spark.conf` on your application's classpath.
+    In this case, system properties take priority over `spark.conf`,
+    and any parameters you set directly on the `SparkConf` object take
+    priority over both of those.
+
+    For unit tests, you can also call C{SparkConf(false)} to skip
+    loading external settings and get the same configuration no matter
+    what is on the classpath.
+
+    All setter methods in this class support chaining. For example,
+    you can write C{conf.setMaster("local").setAppName("My app")}.
+    """
+
     def __init__(self, loadDefaults=True, _jvm=None):
+        """
+        Create a new Spark configuration.
+
+        @param loadDefaults: whether to load values from Java system
+               properties and classpath (true by default)
+        """
         from pyspark.context import SparkContext
         SparkContext._ensure_initialized()
         _jvm = _jvm or SparkContext._jvm
         self._jconf = _jvm.SparkConf(loadDefaults)
 
     def set(self, key, value):
+        """Set a configuration property."""
         self._jconf.set(key, value)
         return self
 
     def setMaster(self, value):
+        """Set master URL to connect to."""
         self._jconf.setMaster(value)
         return self
 
     def setAppName(self, value):
+        """Set application name."""
         self._jconf.setAppName(value)
         return self
 
     def setSparkHome(self, value):
+        """
+        Set path where Spark is installed on worker nodes (needed for some
+        deployment modes).
+        """
         self._jconf.setSparkHome(value)
         return self
 
     def setExecutorEnv(self, key=None, value=None, pairs=None):
+        """Set an environment variable to be passed to executors."""
         if (key != None and pairs != None) or (key == None and pairs == None):
             raise Exception("Either pass one key-value pair or a list of pairs")
         elif key != None:
@@ -83,23 +116,32 @@ class SparkConf(object):
         return self
 
     def setAll(self, pairs):
+        """
+        Set multiple parameters, passed as a list of key-value pairs.
+
+        @param pairs: list of key-value pairs to set
+        """
         for (k, v) in pairs:
             self._jconf.set(k, v)
         return self
 
     def get(self, key):
+        """Get the configured value for some key, if set."""
         return self._jconf.get(key)
 
     def getOrElse(self, key, defaultValue):
+        """Get the value for some key, or return a default otherwise."""
         return self._jconf.getOrElse(key, defaultValue)
 
     def getAll(self):
+        """Get all values as a list of key-value pairs."""
         pairs = []
         for elem in self._jconf.getAll():
             pairs.append((elem._1(), elem._2()))
         return pairs
 
     def contains(self, key):
+        """Does this configuration contain a given key?"""
         return self._jconf.contains(key)
 
 
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 9d75c2b6f1..1244a1495f 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -53,7 +53,8 @@ class SparkContext(object):
     def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
         environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None):
         """
-        Create a new SparkContext.
+        Create a new SparkContext. At least the master and app name should be set,
+        either through the named parameters here or through C{conf}.
 
         @param master: Cluster URL to connect to
                (e.g. mesos://host:port, spark://host:port, local[4]).
-- 
GitLab