diff options
Diffstat (limited to 'python/pyspark/__init__.py')
-rw-r--r-- | python/pyspark/__init__.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py new file mode 100644 index 0000000000..3e8bca62f0 --- /dev/null +++ b/python/pyspark/__init__.py @@ -0,0 +1,27 @@ +""" +PySpark is a Python API for Spark. + +Public classes: + + - L{SparkContext<pyspark.context.SparkContext>} + Main entry point for Spark functionality. + - L{RDD<pyspark.rdd.RDD>} + A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. + - L{Broadcast<pyspark.broadcast.Broadcast>} + A broadcast variable that gets reused across tasks. + - L{Accumulator<pyspark.accumulators.Accumulator>} + An "add-only" shared variable that tasks can only add values to. + - L{SparkFiles<pyspark.files.SparkFiles>} + Access files shipped with jobs. +""" +import sys +import os +sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg")) + + +from pyspark.context import SparkContext +from pyspark.rdd import RDD +from pyspark.files import SparkFiles + + +__all__ = ["SparkContext", "RDD", "SparkFiles"] |