diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-09-16 12:51:58 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@apache.org> | 2014-09-16 12:51:58 -0700 |
commit | ec1adecbb72d291d7ef122fb0505bae53116e0e6 (patch) | |
tree | a61931ca6e78016fbaae5c4b75c97a35c47fde22 /python/pyspark | |
parent | a9e910430fb6bb4ef1f6ae20761c43b96bb018df (diff) | |
download | spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.tar.gz spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.tar.bz2 spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.zip |
[SPARK-3430] [PySpark] [Doc] generate PySpark API docs using Sphinx
Using Sphinx to generate API docs for PySpark.
requirement: Sphinx
```
$ cd python/docs/
$ make html
```
The generated API docs will be located at python/docs/_build/html/index.html
It can co-exists with those generated by Epydoc.
This is the first working version, after merging in, then we can continue to improve it and replace the epydoc finally.
Author: Davies Liu <davies.liu@gmail.com>
Closes #2292 from davies/sphinx and squashes the following commits:
425a3b1 [Davies Liu] cleanup
1573298 [Davies Liu] move docs to python/docs/
5fe3903 [Davies Liu] Merge branch 'master' into sphinx
9468ab0 [Davies Liu] fix makefile
b408f38 [Davies Liu] address all comments
e2ccb1b [Davies Liu] update name and version
9081ead [Davies Liu] generate PySpark API docs using Sphinx
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/broadcast.py | 3 | ||||
-rw-r--r-- | python/pyspark/context.py | 2 | ||||
-rw-r--r-- | python/pyspark/serializers.py | 3 | ||||
-rw-r--r-- | python/pyspark/sql.py | 12 |
4 files changed, 15 insertions, 5 deletions
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py index 5c7c9cc161..f124dc6c07 100644 --- a/python/pyspark/broadcast.py +++ b/python/pyspark/broadcast.py @@ -78,6 +78,9 @@ class Broadcast(object): return self._value def unpersist(self, blocking=False): + """ + Delete cached copies of this broadcast on the executors. + """ self._jbroadcast.unpersist(blocking) os.unlink(self.path) diff --git a/python/pyspark/context.py b/python/pyspark/context.py index a33aae87f6..a17f2c1203 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -53,7 +53,7 @@ class SparkContext(object): """ Main entry point for Spark functionality. A SparkContext represents the - connection to a Spark cluster, and can be used to create L{RDD}s and + connection to a Spark cluster, and can be used to create L{RDD} and broadcast variables on that cluster. """ diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index ec3c6f0554..44ac564283 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -110,6 +110,9 @@ class Serializer(object): def __ne__(self, other): return not self.__eq__(other) + def __repr__(self): + return "<%s object>" % self.__class__.__name__ + class FramedSerializer(Serializer): diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 621a556ec6..8f6dbab240 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -289,7 +289,7 @@ class StructType(DataType): """Spark SQL StructType The data type representing rows. - A StructType object comprises a list of L{StructField}s. + A StructType object comprises a list of L{StructField}. """ @@ -904,7 +904,7 @@ class SQLContext(object): """Main entry point for Spark SQL functionality. - A SQLContext can be used create L{SchemaRDD}s, register L{SchemaRDD}s as + A SQLContext can be used create L{SchemaRDD}, register L{SchemaRDD} as tables, execute SQL over tables, cache tables, and read parquet files. """ @@ -994,7 +994,7 @@ class SQLContext(object): str(returnType)) def inferSchema(self, rdd): - """Infer and apply a schema to an RDD of L{Row}s. + """Infer and apply a schema to an RDD of L{Row}. We peek at the first row of the RDD to determine the fields' names and types. Nested collections are supported, which include array, @@ -1047,7 +1047,7 @@ class SQLContext(object): def applySchema(self, rdd, schema): """ - Applies the given schema to the given RDD of L{tuple} or L{list}s. + Applies the given schema to the given RDD of L{tuple} or L{list}. These tuples or lists can contain complex nested structures like lists, maps or nested rows. @@ -1183,6 +1183,7 @@ class SQLContext(object): Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None) Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')]) Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None) + >>> srdd3 = sqlCtx.jsonFile(jsonFile, srdd1.schema()) >>> sqlCtx.registerRDDAsTable(srdd3, "table2") >>> srdd4 = sqlCtx.sql( @@ -1193,6 +1194,7 @@ class SQLContext(object): Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None) Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')]) Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None) + >>> schema = StructType([ ... StructField("field2", StringType(), True), ... StructField("field3", @@ -1233,6 +1235,7 @@ class SQLContext(object): Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None) Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')]) Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None) + >>> srdd3 = sqlCtx.jsonRDD(json, srdd1.schema()) >>> sqlCtx.registerRDDAsTable(srdd3, "table2") >>> srdd4 = sqlCtx.sql( @@ -1243,6 +1246,7 @@ class SQLContext(object): Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None) Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')]) Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None) + >>> schema = StructType([ ... StructField("field2", StringType(), True), ... StructField("field3", |