aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorMatei Zaharia <matei@databricks.com>2013-12-29 14:46:59 -0500
committerMatei Zaharia <matei@databricks.com>2013-12-29 14:46:59 -0500
commit58c6fa2041b99160f254b17c2b71de9d82c53f8c (patch)
treef42a22c90232793074a5e2c93db5884a3e222e3d /python
parent615fb649d66b13371927a051d249433d746c5f19 (diff)
downloadspark-58c6fa2041b99160f254b17c2b71de9d82c53f8c.tar.gz
spark-58c6fa2041b99160f254b17c2b71de9d82c53f8c.tar.bz2
spark-58c6fa2041b99160f254b17c2b71de9d82c53f8c.zip
Add Python docs about SparkConf
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/conf.py42
-rw-r--r--python/pyspark/context.py3
2 files changed, 44 insertions, 1 deletions
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index eb7a6c13fe..a79f348b52 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -50,29 +50,62 @@ u'value1'
class SparkConf(object):
+ """
+ Configuration for a Spark application. Used to set various Spark
+ parameters as key-value pairs.
+
+ Most of the time, you would create a SparkConf object with
+ C{SparkConf()}, which will load values from `spark.*` Java system
+ properties and any `spark.conf` on your application's classpath.
+ In this case, system properties take priority over `spark.conf`,
+ and any parameters you set directly on the `SparkConf` object take
+ priority over both of those.
+
+ For unit tests, you can also call C{SparkConf(false)} to skip
+ loading external settings and get the same configuration no matter
+ what is on the classpath.
+
+ All setter methods in this class support chaining. For example,
+ you can write C{conf.setMaster("local").setAppName("My app")}.
+ """
+
def __init__(self, loadDefaults=True, _jvm=None):
+ """
+ Create a new Spark configuration.
+
+ @param loadDefaults: whether to load values from Java system
+ properties and classpath (true by default)
+ """
from pyspark.context import SparkContext
SparkContext._ensure_initialized()
_jvm = _jvm or SparkContext._jvm
self._jconf = _jvm.SparkConf(loadDefaults)
def set(self, key, value):
+ """Set a configuration property."""
self._jconf.set(key, value)
return self
def setMaster(self, value):
+ """Set master URL to connect to."""
self._jconf.setMaster(value)
return self
def setAppName(self, value):
+ """Set application name."""
self._jconf.setAppName(value)
return self
def setSparkHome(self, value):
+ """
+ Set path where Spark is installed on worker nodes (needed for some
+ deployment modes).
+ """
self._jconf.setSparkHome(value)
return self
def setExecutorEnv(self, key=None, value=None, pairs=None):
+ """Set an environment variable to be passed to executors."""
if (key != None and pairs != None) or (key == None and pairs == None):
raise Exception("Either pass one key-value pair or a list of pairs")
elif key != None:
@@ -83,23 +116,32 @@ class SparkConf(object):
return self
def setAll(self, pairs):
+ """
+ Set multiple parameters, passed as a list of key-value pairs.
+
+ @param pairs: list of key-value pairs to set
+ """
for (k, v) in pairs:
self._jconf.set(k, v)
return self
def get(self, key):
+ """Get the configured value for some key, if set."""
return self._jconf.get(key)
def getOrElse(self, key, defaultValue):
+ """Get the value for some key, or return a default otherwise."""
return self._jconf.getOrElse(key, defaultValue)
def getAll(self):
+ """Get all values as a list of key-value pairs."""
pairs = []
for elem in self._jconf.getAll():
pairs.append((elem._1(), elem._2()))
return pairs
def contains(self, key):
+ """Does this configuration contain a given key?"""
return self._jconf.contains(key)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 9d75c2b6f1..1244a1495f 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -53,7 +53,8 @@ class SparkContext(object):
def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
environment=None, batchSize=1024, serializer=PickleSerializer(), conf=None):
"""
- Create a new SparkContext.
+ Create a new SparkContext. At least the master and app name should be set,
+ either through the named parameters here or through C{conf}.
@param master: Cluster URL to connect to
(e.g. mesos://host:port, spark://host:port, local[4]).