[SPARK-3430] [PySpark] [Doc] generate PySpark API docs using Sphinx

Using Sphinx to generate API docs for PySpark. requirement: Sphinx ``` $ cd python/docs/ $ make html ``` The generated API docs will be located at python/docs/_build/html/index.html It can co-exists with those generated by Epydoc. This is the first working version, after merging in, then we can continue to improve it and replace the epydoc finally. Author: Davies Liu <davies.liu@gmail.com> Closes #2292 from davies/sphinx and squashes the following commits: 425a3b1 [Davies Liu] cleanup 1573298 [Davies Liu] move docs to python/docs/ 5fe3903 [Davies Liu] Merge branch 'master' into sphinx 9468ab0 [Davies Liu] fix makefile b408f38 [Davies Liu] address all comments e2ccb1b [Davies Liu] update name and version 9081ead [Davies Liu] generate PySpark API docs using Sphinx
author: Davies Liu <davies.liu@gmail.com> 2014-09-16 12:51:58 -0700
committer: Josh Rosen <joshrosen@apache.org> 2014-09-16 12:51:58 -0700
commit: ec1adecbb72d291d7ef122fb0505bae53116e0e6 (patch)
tree: a61931ca6e78016fbaae5c4b75c97a35c47fde22 /python/pyspark
parent: a9e910430fb6bb4ef1f6ae20761c43b96bb018df (diff)
download: spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.tar.gz
spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.tar.bz2
spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.zip
4 files changed, 15 insertions, 5 deletions
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index 5c7c9cc161..f124dc6c07 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -78,6 +78,9 @@ class Broadcast(object):
         return self._value
 
     def unpersist(self, blocking=False):
+        """
+        Delete cached copies of this broadcast on the executors.
+        """
         self._jbroadcast.unpersist(blocking)
         os.unlink(self.path)
 
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index a33aae87f6..a17f2c1203 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -53,7 +53,7 @@ class SparkContext(object):
 
     """
     Main entry point for Spark functionality. A SparkContext represents the
-    connection to a Spark cluster, and can be used to create L{RDD}s and
+    connection to a Spark cluster, and can be used to create L{RDD} and
     broadcast variables on that cluster.
     """
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index ec3c6f0554..44ac564283 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -110,6 +110,9 @@ class Serializer(object):
     def __ne__(self, other):
         return not self.__eq__(other)
 
+    def __repr__(self):
+        return "<%s object>" % self.__class__.__name__
+
 
 class FramedSerializer(Serializer):
 
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 621a556ec6..8f6dbab240 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -289,7 +289,7 @@ class StructType(DataType):
     """Spark SQL StructType
 
     The data type representing rows.
-    A StructType object comprises a list of L{StructField}s.
+    A StructType object comprises a list of L{StructField}.
 
     """
 
@@ -904,7 +904,7 @@ class SQLContext(object):
 
     """Main entry point for Spark SQL functionality.
 
-    A SQLContext can be used create L{SchemaRDD}s, register L{SchemaRDD}s as
+    A SQLContext can be used create L{SchemaRDD}, register L{SchemaRDD} as
     tables, execute SQL over tables, cache tables, and read parquet files.
     """
 
@@ -994,7 +994,7 @@ class SQLContext(object):
                                       str(returnType))
 
     def inferSchema(self, rdd):
-        """Infer and apply a schema to an RDD of L{Row}s.
+        """Infer and apply a schema to an RDD of L{Row}.
 
         We peek at the first row of the RDD to determine the fields' names
         and types. Nested collections are supported, which include array,
@@ -1047,7 +1047,7 @@ class SQLContext(object):
 
     def applySchema(self, rdd, schema):
         """
-        Applies the given schema to the given RDD of L{tuple} or L{list}s.
+        Applies the given schema to the given RDD of L{tuple} or L{list}.
 
         These tuples or lists can contain complex nested structures like
         lists, maps or nested rows.
@@ -1183,6 +1183,7 @@ class SQLContext(object):
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')])
         Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
         >>> srdd3 = sqlCtx.jsonFile(jsonFile, srdd1.schema())
         >>> sqlCtx.registerRDDAsTable(srdd3, "table2")
         >>> srdd4 = sqlCtx.sql(
@@ -1193,6 +1194,7 @@ class SQLContext(object):
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22,..., f4=[Row(field7=u'row2')])
         Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
         >>> schema = StructType([
         ...     StructField("field2", StringType(), True),
         ...     StructField("field3",
@@ -1233,6 +1235,7 @@ class SQLContext(object):
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')])
         Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
         >>> srdd3 = sqlCtx.jsonRDD(json, srdd1.schema())
         >>> sqlCtx.registerRDDAsTable(srdd3, "table2")
         >>> srdd4 = sqlCtx.sql(
@@ -1243,6 +1246,7 @@ class SQLContext(object):
         Row(f1=1, f2=u'row1', f3=Row(field4=11, field5=None), f4=None)
         Row(f1=2, f2=None, f3=Row(field4=22..., f4=[Row(field7=u'row2')])
         Row(f1=None, f2=u'row3', f3=Row(field4=33, field5=[]), f4=None)
+
         >>> schema = StructType([
         ...     StructField("field2", StringType(), True),
         ...     StructField("field3",
author	Davies Liu <davies.liu@gmail.com>	2014-09-16 12:51:58 -0700
committer	Josh Rosen <joshrosen@apache.org>	2014-09-16 12:51:58 -0700
commit	ec1adecbb72d291d7ef122fb0505bae53116e0e6 (patch)
tree	a61931ca6e78016fbaae5c4b75c97a35c47fde22 /python/pyspark
parent	a9e910430fb6bb4ef1f6ae20761c43b96bb018df (diff)
download	spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.tar.gz spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.tar.bz2 spark-ec1adecbb72d291d7ef122fb0505bae53116e0e6.zip