aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorJyotiska NK <jyotiska123@gmail.com>2014-05-28 23:08:39 -0700
committerMatei Zaharia <matei@databricks.com>2014-05-28 23:08:39 -0700
commit9cff1dd25abc5e848720d853172ed42e35376fd0 (patch)
treefd70c8030abe4b832106cd51e1785215730f6fb4 /python
parent4dbb27b0cf4eb67c92aad2c1158616312f5a54e6 (diff)
downloadspark-9cff1dd25abc5e848720d853172ed42e35376fd0.tar.gz
spark-9cff1dd25abc5e848720d853172ed42e35376fd0.tar.bz2
spark-9cff1dd25abc5e848720d853172ed42e35376fd0.zip
Added doctest and method description in context.py
Added doctest for method textFile and description for methods _initialize_context and _ensure_initialized in context.py Author: Jyotiska NK <jyotiska123@gmail.com> Closes #187 from jyotiska/pyspark_context and squashes the following commits: 356f945 [Jyotiska NK] Added doctest for textFile method in context.py 5b23686 [Jyotiska NK] Updated context.py with method descriptions
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/context.py15
1 files changed, 14 insertions, 1 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 27b440d73b..56746cb7aa 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -173,12 +173,18 @@ class SparkContext(object):
self._temp_dir = \
self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath()
- # Initialize SparkContext in function to allow subclass specific initialization
def _initialize_context(self, jconf):
+ """
+ Initialize SparkContext in function to allow subclass specific initialization
+ """
return self._jvm.JavaSparkContext(jconf)
@classmethod
def _ensure_initialized(cls, instance=None, gateway=None):
+ """
+ Checks whether a SparkContext is initialized or not.
+ Throws error if a SparkContext is already running.
+ """
with SparkContext._lock:
if not SparkContext._gateway:
SparkContext._gateway = gateway or launch_gateway()
@@ -270,6 +276,13 @@ class SparkContext(object):
Read a text file from HDFS, a local file system (available on all
nodes), or any Hadoop-supported file system URI, and return it as an
RDD of Strings.
+
+ >>> path = os.path.join(tempdir, "sample-text.txt")
+ >>> with open(path, "w") as testFile:
+ ... testFile.write("Hello world!")
+ >>> textFile = sc.textFile(path)
+ >>> textFile.collect()
+ [u'Hello world!']
"""
minPartitions = minPartitions or min(self.defaultParallelism, 2)
return RDD(self._jsc.textFile(name, minPartitions), self,