diff options
author | Jyotiska NK <jyotiska123@gmail.com> | 2014-05-28 23:08:39 -0700 |
---|---|---|
committer | Matei Zaharia <matei@databricks.com> | 2014-05-28 23:08:39 -0700 |
commit | 9cff1dd25abc5e848720d853172ed42e35376fd0 (patch) | |
tree | fd70c8030abe4b832106cd51e1785215730f6fb4 /python | |
parent | 4dbb27b0cf4eb67c92aad2c1158616312f5a54e6 (diff) | |
download | spark-9cff1dd25abc5e848720d853172ed42e35376fd0.tar.gz spark-9cff1dd25abc5e848720d853172ed42e35376fd0.tar.bz2 spark-9cff1dd25abc5e848720d853172ed42e35376fd0.zip |
Added doctest and method description in context.py
Added doctest for method textFile and description for methods _initialize_context and _ensure_initialized in context.py
Author: Jyotiska NK <jyotiska123@gmail.com>
Closes #187 from jyotiska/pyspark_context and squashes the following commits:
356f945 [Jyotiska NK] Added doctest for textFile method in context.py
5b23686 [Jyotiska NK] Updated context.py with method descriptions
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/context.py | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 27b440d73b..56746cb7aa 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -173,12 +173,18 @@ class SparkContext(object): self._temp_dir = \ self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir).getAbsolutePath() - # Initialize SparkContext in function to allow subclass specific initialization def _initialize_context(self, jconf): + """ + Initialize SparkContext in function to allow subclass specific initialization + """ return self._jvm.JavaSparkContext(jconf) @classmethod def _ensure_initialized(cls, instance=None, gateway=None): + """ + Checks whether a SparkContext is initialized or not. + Throws error if a SparkContext is already running. + """ with SparkContext._lock: if not SparkContext._gateway: SparkContext._gateway = gateway or launch_gateway() @@ -270,6 +276,13 @@ class SparkContext(object): Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings. + + >>> path = os.path.join(tempdir, "sample-text.txt") + >>> with open(path, "w") as testFile: + ... testFile.write("Hello world!") + >>> textFile = sc.textFile(path) + >>> textFile.collect() + [u'Hello world!'] """ minPartitions = minPartitions or min(self.defaultParallelism, 2) return RDD(self._jsc.textFile(name, minPartitions), self, |