diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-09-03 11:49:45 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@apache.org> | 2014-09-03 11:49:45 -0700 |
commit | 6481d27425f6d42ead36663c9a4ef7ee13b3a8c9 (patch) | |
tree | 051c394c0735be33d4bb7f9fd90f403e9b5f2dcd /python/pyspark/mllib | |
parent | 6a72a36940311fcb3429bd34c8818bc7d513115c (diff) | |
download | spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.gz spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.tar.bz2 spark-6481d27425f6d42ead36663c9a4ef7ee13b3a8c9.zip |
[SPARK-3309] [PySpark] Put all public API in __all__
Put all public API in __all__, also put them all in pyspark.__init__.py, then we can got all the documents for public API by `pydoc pyspark`. It also can be used by other programs (such as Sphinx or Epydoc) to generate only documents for public APIs.
Author: Davies Liu <davies.liu@gmail.com>
Closes #2205 from davies/public and squashes the following commits:
c6c5567 [Davies Liu] fix message
f7b35be [Davies Liu] put SchemeRDD, Row in pyspark.sql module
7e3016a [Davies Liu] add __all__ in mllib
6281b48 [Davies Liu] fix doc for SchemaRDD
6caab21 [Davies Liu] add public interfaces into pyspark.__init__.py
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r-- | python/pyspark/mllib/classification.py | 4 | ||||
-rw-r--r-- | python/pyspark/mllib/clustering.py | 2 | ||||
-rw-r--r-- | python/pyspark/mllib/linalg.py | 3 | ||||
-rw-r--r-- | python/pyspark/mllib/random.py | 3 | ||||
-rw-r--r-- | python/pyspark/mllib/recommendation.py | 2 | ||||
-rw-r--r-- | python/pyspark/mllib/regression.py | 10 | ||||
-rw-r--r-- | python/pyspark/mllib/stat.py | 6 | ||||
-rw-r--r-- | python/pyspark/mllib/tree.py | 4 |
8 files changed, 27 insertions, 7 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index ffdda7ee19..71ab46b61d 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -30,6 +30,10 @@ from pyspark.mllib.regression import LabeledPoint, LinearModel from math import exp, log +__all__ = ['LogisticRegressionModel', 'LogisticRegressionWithSGD', 'SVMModel', + 'SVMWithSGD', 'NaiveBayesModel', 'NaiveBayes'] + + class LogisticRegressionModel(LinearModel): """A linear binary classification model derived from logistic regression. diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index a0630d1d5c..f3e952a1d8 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -25,6 +25,8 @@ from pyspark.mllib._common import \ _get_initial_weights, _serialize_rating, _regression_train_wrapper from pyspark.mllib.linalg import SparseVector +__all__ = ['KMeansModel', 'KMeans'] + class KMeansModel(object): diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index f485a69db1..e69051c104 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -27,6 +27,9 @@ import numpy from numpy import array, array_equal, ndarray, float64, int32 +__all__ = ['SparseVector', 'Vectors'] + + class SparseVector(object): """ diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py index 4dc1a4a912..3e59c73db8 100644 --- a/python/pyspark/mllib/random.py +++ b/python/pyspark/mllib/random.py @@ -25,6 +25,9 @@ from pyspark.mllib._common import _deserialize_double, _deserialize_double_vecto from pyspark.serializers import NoOpSerializer +__all__ = ['RandomRDDs', ] + + class RandomRDDs: """ Generator methods for creating RDDs comprised of i.i.d samples from diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index e863fc249e..2df23394da 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -24,6 +24,8 @@ from pyspark.mllib._common import \ _serialize_tuple, RatingDeserializer from pyspark.rdd import RDD +__all__ = ['MatrixFactorizationModel', 'ALS'] + class MatrixFactorizationModel(object): diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index d8792cf448..f572dcfb84 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -17,15 +17,15 @@ from numpy import array, ndarray from pyspark import SparkContext -from pyspark.mllib._common import \ - _dot, _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \ - _serialize_double_matrix, _deserialize_double_matrix, \ - _serialize_double_vector, _deserialize_double_vector, \ - _get_initial_weights, _serialize_rating, _regression_train_wrapper, \ +from pyspark.mllib._common import _dot, _regression_train_wrapper, \ _linear_predictor_typecheck, _have_scipy, _scipy_issparse from pyspark.mllib.linalg import SparseVector, Vectors +__all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel' + 'LinearRegressionWithSGD', 'LassoWithSGD', 'RidgeRegressionWithSGD'] + + class LabeledPoint(object): """ diff --git a/python/pyspark/mllib/stat.py b/python/pyspark/mllib/stat.py index feef0d16cd..8c726f171c 100644 --- a/python/pyspark/mllib/stat.py +++ b/python/pyspark/mllib/stat.py @@ -21,8 +21,10 @@ Python package for statistical functions in MLlib. from pyspark.mllib._common import \ _get_unmangled_double_vector_rdd, _get_unmangled_rdd, \ - _serialize_double, _serialize_double_vector, \ - _deserialize_double, _deserialize_double_matrix, _deserialize_double_vector + _serialize_double, _deserialize_double_matrix, _deserialize_double_vector + + +__all__ = ['MultivariateStatisticalSummary', 'Statistics'] class MultivariateStatisticalSummary(object): diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index e9d778df5a..a2fade61e9 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -26,6 +26,9 @@ from pyspark.mllib.regression import LabeledPoint from pyspark.serializers import NoOpSerializer +__all__ = ['DecisionTreeModel', 'DecisionTree'] + + class DecisionTreeModel(object): """ @@ -88,6 +91,7 @@ class DecisionTree(object): It will probably be modified for Spark v1.2. Example usage: + >>> from numpy import array >>> import sys >>> from pyspark.mllib.regression import LabeledPoint |