diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-10-07 18:09:27 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@apache.org> | 2014-10-07 18:09:27 -0700 |
commit | 798ed22c289cf65f2249bf2f4250285685ca69e7 (patch) | |
tree | 137d93c32454aaf39e6416823a8604f816f73926 /python/pyspark/mllib | |
parent | b69c9fb6fb048509bbd8430fb697dc3a5ca4fe59 (diff) | |
download | spark-798ed22c289cf65f2249bf2f4250285685ca69e7.tar.gz spark-798ed22c289cf65f2249bf2f4250285685ca69e7.tar.bz2 spark-798ed22c289cf65f2249bf2f4250285685ca69e7.zip |
[SPARK-3412] [PySpark] Replace Epydoc with Sphinx to generate Python API docs
Retire Epydoc, use Sphinx to generate API docs.
Refine Sphinx docs, also convert some docstrings into Sphinx style.
It looks like:
![api doc](https://cloud.githubusercontent.com/assets/40902/4538272/9e2d4f10-4dec-11e4-8d96-6e45a8fe51f9.png)
Author: Davies Liu <davies.liu@gmail.com>
Closes #2689 from davies/docs and squashes the following commits:
bf4a0a5 [Davies Liu] fix links
3fb1572 [Davies Liu] fix _static in jekyll
65a287e [Davies Liu] fix scripts and logo
8524042 [Davies Liu] Merge branch 'master' of github.com:apache/spark into docs
d5b874a [Davies Liu] Merge branch 'master' of github.com:apache/spark into docs
4bc1c3c [Davies Liu] refactor
746d0b6 [Davies Liu] @param -> :param
240b393 [Davies Liu] replace epydoc with sphinx doc
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r-- | python/pyspark/mllib/classification.py | 32 | ||||
-rw-r--r-- | python/pyspark/mllib/linalg.py | 8 | ||||
-rw-r--r-- | python/pyspark/mllib/regression.py | 18 | ||||
-rw-r--r-- | python/pyspark/mllib/util.py | 18 |
4 files changed, 38 insertions, 38 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index a765b1c4f7..cd43982191 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -79,15 +79,15 @@ class LogisticRegressionWithSGD(object): """ Train a logistic regression model on the given data. - @param data: The training data. - @param iterations: The number of iterations (default: 100). - @param step: The step parameter used in SGD + :param data: The training data. + :param iterations: The number of iterations (default: 100). + :param step: The step parameter used in SGD (default: 1.0). - @param miniBatchFraction: Fraction of data to be used for each SGD + :param miniBatchFraction: Fraction of data to be used for each SGD iteration. - @param initialWeights: The initial weights (default: None). - @param regParam: The regularizer parameter (default: 1.0). - @param regType: The type of regularizer used for training + :param initialWeights: The initial weights (default: None). + :param regParam: The regularizer parameter (default: 1.0). + :param regType: The type of regularizer used for training our model. :Allowed values: @@ -151,15 +151,15 @@ class SVMWithSGD(object): """ Train a support vector machine on the given data. - @param data: The training data. - @param iterations: The number of iterations (default: 100). - @param step: The step parameter used in SGD + :param data: The training data. + :param iterations: The number of iterations (default: 100). + :param step: The step parameter used in SGD (default: 1.0). - @param regParam: The regularizer parameter (default: 1.0). - @param miniBatchFraction: Fraction of data to be used for each SGD + :param regParam: The regularizer parameter (default: 1.0). + :param miniBatchFraction: Fraction of data to be used for each SGD iteration. - @param initialWeights: The initial weights (default: None). - @param regType: The type of regularizer used for training + :param initialWeights: The initial weights (default: None). + :param regType: The type of regularizer used for training our model. :Allowed values: @@ -238,10 +238,10 @@ class NaiveBayes(object): classification. By making every vector a 0-1 vector, it can also be used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}). - @param data: RDD of NumPy vectors, one per element, where the first + :param data: RDD of NumPy vectors, one per element, where the first coordinate is the label and the rest is the feature vector (e.g. a count vector). - @param lambda_: The smoothing parameter + :param lambda_: The smoothing parameter """ sc = data.context jlist = sc._jvm.PythonMLLibAPI().trainNaiveBayes(data._to_java_object_rdd(), lambda_) diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index 51014a8ceb..24c5480b2f 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -238,8 +238,8 @@ class SparseVector(Vector): (index, value) pairs, or two separate arrays of indices and values (sorted by index). - @param size: Size of the vector. - @param args: Non-zero entries, as a dictionary, list of tupes, + :param size: Size of the vector. + :param args: Non-zero entries, as a dictionary, list of tupes, or two sorted lists containing indices and values. >>> print SparseVector(4, {1: 1.0, 3: 5.5}) @@ -458,8 +458,8 @@ class Vectors(object): (index, value) pairs, or two separate arrays of indices and values (sorted by index). - @param size: Size of the vector. - @param args: Non-zero entries, as a dictionary, list of tupes, + :param size: Size of the vector. + :param args: Non-zero entries, as a dictionary, list of tupes, or two sorted lists containing indices and values. >>> print Vectors.sparse(4, {1: 1.0, 3: 5.5}) diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 54f34a9833..12b322aaae 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -31,8 +31,8 @@ class LabeledPoint(object): """ The features and labels of a data point. - @param label: Label for this data point. - @param features: Vector of features for this point (NumPy array, list, + :param label: Label for this data point. + :param features: Vector of features for this point (NumPy array, list, pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix) """ @@ -145,15 +145,15 @@ class LinearRegressionWithSGD(object): """ Train a linear regression model on the given data. - @param data: The training data. - @param iterations: The number of iterations (default: 100). - @param step: The step parameter used in SGD + :param data: The training data. + :param iterations: The number of iterations (default: 100). + :param step: The step parameter used in SGD (default: 1.0). - @param miniBatchFraction: Fraction of data to be used for each SGD + :param miniBatchFraction: Fraction of data to be used for each SGD iteration. - @param initialWeights: The initial weights (default: None). - @param regParam: The regularizer parameter (default: 1.0). - @param regType: The type of regularizer used for training + :param initialWeights: The initial weights (default: None). + :param regParam: The regularizer parameter (default: 1.0). + :param regType: The type of regularizer used for training our model. :Allowed values: diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py index 8233d4e81f..1357fd4fbc 100644 --- a/python/pyspark/mllib/util.py +++ b/python/pyspark/mllib/util.py @@ -77,10 +77,10 @@ class MLUtils(object): method parses each line into a LabeledPoint, where the feature indices are converted to zero-based. - @param sc: Spark context - @param path: file or directory path in any Hadoop-supported file + :param sc: Spark context + :param path: file or directory path in any Hadoop-supported file system URI - @param numFeatures: number of features, which will be determined + :param numFeatures: number of features, which will be determined from the input data if a nonpositive value is given. This is useful when the dataset is already split into multiple files and you @@ -88,7 +88,7 @@ class MLUtils(object): features may not present in certain files, which leads to inconsistent feature dimensions. - @param minPartitions: min number of partitions + :param minPartitions: min number of partitions @return: labeled data stored as an RDD of LabeledPoint >>> from tempfile import NamedTemporaryFile @@ -126,8 +126,8 @@ class MLUtils(object): """ Save labeled data in LIBSVM format. - @param data: an RDD of LabeledPoint to be saved - @param dir: directory to save the data + :param data: an RDD of LabeledPoint to be saved + :param dir: directory to save the data >>> from tempfile import NamedTemporaryFile >>> from fileinput import input @@ -149,10 +149,10 @@ class MLUtils(object): """ Load labeled points saved using RDD.saveAsTextFile. - @param sc: Spark context - @param path: file or directory path in any Hadoop-supported file + :param sc: Spark context + :param path: file or directory path in any Hadoop-supported file system URI - @param minPartitions: min number of partitions + :param minPartitions: min number of partitions @return: labeled data stored as an RDD of LabeledPoint >>> from tempfile import NamedTemporaryFile |