aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/context.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-20 15:31:41 -0800
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-20 15:31:41 -0800
commit5b6ea9e9a04994553d0319c541ca356e2e3064a7 (patch)
treea2af005f7ec7524707bdaf649290c035febed0dd /python/pyspark/context.py
parentd0ba80dc727d00b2b7627dcefd2c77009af55f7d (diff)
downloadspark-5b6ea9e9a04994553d0319c541ca356e2e3064a7.tar.gz
spark-5b6ea9e9a04994553d0319c541ca356e2e3064a7.tar.bz2
spark-5b6ea9e9a04994553d0319c541ca356e2e3064a7.zip
Update checkpointing API docs in Python/Java.
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r--python/pyspark/context.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 8beb8e2ae9..dcbed37270 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -202,9 +202,12 @@ class SparkContext(object):
def setCheckpointDir(self, dirName, useExisting=False):
"""
- Set the directory under which RDDs are going to be checkpointed. This
- method will create this directory and will throw an exception of the
- path already exists (to avoid overwriting existing files may be
- overwritten). The directory will be deleted on exit if indicated.
+ Set the directory under which RDDs are going to be checkpointed. The
+ directory must be a HDFS path if running on a cluster.
+
+ If the directory does not exist, it will be created. If the directory
+ exists and C{useExisting} is set to true, then the exisiting directory
+ will be used. Otherwise an exception will be thrown to prevent
+ accidental overriding of checkpoint files in the existing directory.
"""
self._jsc.sc().setCheckpointDir(dirName, useExisting)