diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-09-03 11:49:45 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@apache.org> | 2014-09-03 11:49:45 -0700 |
commit | 6481d27425f6d42ead36663c9a4ef7ee13b3a8c9 (patch) | |
tree | 051c394c0735be33d4bb7f9fd90f403e9b5f2dcd /python/pyspark/sql.py | |
parent | 6a72a36940311fcb3429bd34c8818bc7d513115c (diff) | |
download | spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.gz spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.bz2 spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.zip |
[SPARK-3309] [PySpark] Put all public API in __all__
Put all public API in __all__, also put them all in pyspark.__init__.py, then we can got all the documents for public API by `pydoc pyspark`. It also can be used by other programs (such as Sphinx or Epydoc) to generate only documents for public APIs.
Author: Davies Liu <davies.liu@gmail.com>
Closes #2205 from davies/public and squashes the following commits:
c6c5567 [Davies Liu] fix message
f7b35be [Davies Liu] put SchemeRDD, Row in pyspark.sql module
7e3016a [Davies Liu] add __all__ in mllib
6281b48 [Davies Liu] fix doc for SchemaRDD
6caab21 [Davies Liu] add public interfaces into pyspark.__init__.py
Diffstat (limited to 'python/pyspark/sql.py')
-rw-r--r-- | python/pyspark/sql.py | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 0ff6a548a8..44316926ba 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -40,8 +40,7 @@ __all__ = [ "StringType", "BinaryType", "BooleanType", "TimestampType", "DecimalType", "DoubleType", "FloatType", "ByteType", "IntegerType", "LongType", "ShortType", "ArrayType", "MapType", "StructField", "StructType", - "SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", - "SchemaRDD", "Row"] + "SQLContext", "HiveContext", "SchemaRDD", "Row"] class DataType(object): @@ -1037,7 +1036,7 @@ class SQLContext: "can not infer schema") if type(first) is dict: warnings.warn("Using RDD of dict to inferSchema is deprecated," - "please use pyspark.Row instead") + "please use pyspark.sql.Row instead") schema = _infer_schema(first) rdd = rdd.mapPartitions(lambda rows: _drop_schema(rows, schema)) @@ -1487,6 +1486,21 @@ class Row(tuple): return "<Row(%s)>" % ", ".join(self) +def inherit_doc(cls): + for name, func in vars(cls).items(): + # only inherit docstring for public functions + if name.startswith("_"): + continue + if not func.__doc__: + for parent in cls.__bases__: + parent_func = getattr(parent, name, None) + if parent_func and getattr(parent_func, "__doc__", None): + func.__doc__ = parent_func.__doc__ + break + return cls + + +@inherit_doc class SchemaRDD(RDD): """An RDD of L{Row} objects that has an associated schema. @@ -1563,6 +1577,7 @@ class SchemaRDD(RDD): self._jschema_rdd.registerTempTable(name) def registerAsTable(self, name): + """DEPRECATED: use registerTempTable() instead""" warnings.warn("Use registerTempTable instead of registerAsTable.", DeprecationWarning) self.registerTempTable(name) |