diff options
author | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-12-28 22:51:28 -0800 |
---|---|---|
committer | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-12-28 22:51:28 -0800 |
commit | c2b105af34f7241ac0597d9c35fbf66633a3eaf6 (patch) | |
tree | e96946d2b714365937019f60741bf3ae62d565c6 /pyspark | |
parent | 7ec3595de28d53839cb3a45e940ec16f81ffdf45 (diff) | |
download | spark-c2b105af34f7241ac0597d9c35fbf66633a3eaf6.tar.gz spark-c2b105af34f7241ac0597d9c35fbf66633a3eaf6.tar.bz2 spark-c2b105af34f7241ac0597d9c35fbf66633a3eaf6.zip |
Add documentation for Python API.
Diffstat (limited to 'pyspark')
-rw-r--r-- | pyspark/README | 42 | ||||
-rw-r--r-- | pyspark/examples/kmeans.py (renamed from pyspark/pyspark/examples/kmeans.py) | 0 | ||||
-rw-r--r-- | pyspark/examples/pi.py (renamed from pyspark/pyspark/examples/pi.py) | 0 | ||||
-rw-r--r-- | pyspark/examples/tc.py (renamed from pyspark/pyspark/examples/tc.py) | 0 | ||||
-rw-r--r-- | pyspark/examples/wordcount.py (renamed from pyspark/pyspark/examples/wordcount.py) | 0 | ||||
-rw-r--r-- | pyspark/pyspark/__init__.py | 6 | ||||
-rw-r--r-- | pyspark/pyspark/examples/__init__.py | 0 |
7 files changed, 6 insertions, 42 deletions
diff --git a/pyspark/README b/pyspark/README deleted file mode 100644 index d8d521c72c..0000000000 --- a/pyspark/README +++ /dev/null @@ -1,42 +0,0 @@ -# PySpark - -PySpark is a Python API for Spark. - -PySpark jobs are writen in Python and executed using a standard Python -interpreter; this supports modules that use Python C extensions. The -API is based on the Spark Scala API and uses regular Python functions -and lambdas to support user-defined functions. PySpark supports -interactive use through a standard Python interpreter; it can -automatically serialize closures and ship them to worker processes. - -PySpark is built on top of the Spark Java API. Data is uniformly -represented as serialized Python objects and stored in Spark Java -processes, which communicate with PySpark worker processes over pipes. - -## Features - -PySpark supports most of the Spark API, including broadcast variables. -RDDs are dynamically typed and can hold any Python object. - -PySpark does not support: - -- Special functions on RDDs of doubles -- Accumulators - -## Examples and Documentation - -The PySpark source contains docstrings and doctests that document its -API. The public classes are in `context.py` and `rdd.py`. - -The `pyspark/pyspark/examples` directory contains a few complete -examples. - -## Installing PySpark -# -To use PySpark, `SPARK_HOME` should be set to the location of the Spark -package. - -## Running PySpark - -The easiest way to run PySpark is to use the `run-pyspark` and -`pyspark-shell` scripts, which are included in the `pyspark` directory. diff --git a/pyspark/pyspark/examples/kmeans.py b/pyspark/examples/kmeans.py index 9cc366f03c..9cc366f03c 100644 --- a/pyspark/pyspark/examples/kmeans.py +++ b/pyspark/examples/kmeans.py diff --git a/pyspark/pyspark/examples/pi.py b/pyspark/examples/pi.py index 348bbc5dce..348bbc5dce 100644 --- a/pyspark/pyspark/examples/pi.py +++ b/pyspark/examples/pi.py diff --git a/pyspark/pyspark/examples/tc.py b/pyspark/examples/tc.py index 9630e72b47..9630e72b47 100644 --- a/pyspark/pyspark/examples/tc.py +++ b/pyspark/examples/tc.py diff --git a/pyspark/pyspark/examples/wordcount.py b/pyspark/examples/wordcount.py index 8365c070e8..8365c070e8 100644 --- a/pyspark/pyspark/examples/wordcount.py +++ b/pyspark/examples/wordcount.py diff --git a/pyspark/pyspark/__init__.py b/pyspark/pyspark/__init__.py index 549c2d2711..8f8402b62b 100644 --- a/pyspark/pyspark/__init__.py +++ b/pyspark/pyspark/__init__.py @@ -1,3 +1,9 @@ import sys import os sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "pyspark/lib/py4j0.7.egg")) + + +from pyspark.context import SparkContext + + +__all__ = ["SparkContext"] diff --git a/pyspark/pyspark/examples/__init__.py b/pyspark/pyspark/examples/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 --- a/pyspark/pyspark/examples/__init__.py +++ /dev/null |