[SPARK-3309] [PySpark] Put all public API in __all__

Put all public API in __all__, also put them all in pyspark.__init__.py, then we can got all the documents for public API by `pydoc pyspark`. It also can be used by other programs (such as Sphinx or Epydoc) to generate only documents for public APIs. Author: Davies Liu <davies.liu@gmail.com> Closes #2205 from davies/public and squashes the following commits: c6c5567 [Davies Liu] fix message f7b35be [Davies Liu] put SchemeRDD, Row in pyspark.sql module 7e3016a [Davies Liu] add __all__ in mllib 6281b48 [Davies Liu] fix doc for SchemaRDD 6caab21 [Davies Liu] add public interfaces into pyspark.__init__.py
author: Davies Liu <davies.liu@gmail.com> 2014-09-03 11:49:45 -0700
committer: Josh Rosen <joshrosen@apache.org> 2014-09-03 11:49:45 -0700
commit: 6481d27425f6d42ead36663c9a4ef7ee13b3a8c9 (patch)
tree: 051c394c0735be33d4bb7f9fd90f403e9b5f2dcd /python/pyspark/sql.py
parent: 6a72a36940311fcb3429bd34c8818bc7d513115c (diff)
download: spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.gz
spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.bz2
spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.zip
1 files changed, 18 insertions, 3 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 0ff6a548a8..44316926ba 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -40,8 +40,7 @@ __all__ = [
     "StringType", "BinaryType", "BooleanType", "TimestampType", "DecimalType",
     "DoubleType", "FloatType", "ByteType", "IntegerType", "LongType",
     "ShortType", "ArrayType", "MapType", "StructField", "StructType",
-    "SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext",
-    "SchemaRDD", "Row"]
+    "SQLContext", "HiveContext", "SchemaRDD", "Row"]
 
 
 class DataType(object):
@@ -1037,7 +1036,7 @@ class SQLContext:
                              "can not infer schema")
         if type(first) is dict:
             warnings.warn("Using RDD of dict to inferSchema is deprecated,"
-                          "please use pyspark.Row instead")
+                          "please use pyspark.sql.Row instead")
 
         schema = _infer_schema(first)
         rdd = rdd.mapPartitions(lambda rows: _drop_schema(rows, schema))
@@ -1487,6 +1486,21 @@ class Row(tuple):
             return "<Row(%s)>" % ", ".join(self)
 
 
+def inherit_doc(cls):
+    for name, func in vars(cls).items():
+        # only inherit docstring for public functions
+        if name.startswith("_"):
+            continue
+        if not func.__doc__:
+            for parent in cls.__bases__:
+                parent_func = getattr(parent, name, None)
+                if parent_func and getattr(parent_func, "__doc__", None):
+                    func.__doc__ = parent_func.__doc__
+                    break
+    return cls
+
+
+@inherit_doc
 class SchemaRDD(RDD):
 
     """An RDD of L{Row} objects that has an associated schema.
@@ -1563,6 +1577,7 @@ class SchemaRDD(RDD):
         self._jschema_rdd.registerTempTable(name)
 
     def registerAsTable(self, name):
+        """DEPRECATED: use registerTempTable() instead"""
         warnings.warn("Use registerTempTable instead of registerAsTable.", DeprecationWarning)
         self.registerTempTable(name)
author	Davies Liu <davies.liu@gmail.com>	2014-09-03 11:49:45 -0700
committer	Josh Rosen <joshrosen@apache.org>	2014-09-03 11:49:45 -0700
commit	6481d27425f6d42ead36663c9a4ef7ee13b3a8c9 (patch)
tree	051c394c0735be33d4bb7f9fd90f403e9b5f2dcd /python/pyspark/sql.py
parent	6a72a36940311fcb3429bd34c8818bc7d513115c (diff)
download	spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.gz spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.bz2 spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.zip