aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/mllib
diff options
context:
space:
mode:
authorDavies Liu <davies.liu@gmail.com>2014-10-07 18:09:27 -0700
committerJosh Rosen <joshrosen@apache.org>2014-10-07 18:09:27 -0700
commit798ed22c289cf65f2249bf2f4250285685ca69e7 (patch)
tree137d93c32454aaf39e6416823a8604f816f73926 /python/pyspark/mllib
parentb69c9fb6fb048509bbd8430fb697dc3a5ca4fe59 (diff)
downloadspark-798ed22c289cf65f2249bf2f4250285685ca69e7.tar.gz
spark-798ed22c289cf65f2249bf2f4250285685ca69e7.tar.bz2
spark-798ed22c289cf65f2249bf2f4250285685ca69e7.zip
[SPARK-3412] [PySpark] Replace Epydoc with Sphinx to generate Python API docs
Retire Epydoc, use Sphinx to generate API docs. Refine Sphinx docs, also convert some docstrings into Sphinx style. It looks like: ![api doc](https://cloud.githubusercontent.com/assets/40902/4538272/9e2d4f10-4dec-11e4-8d96-6e45a8fe51f9.png) Author: Davies Liu <davies.liu@gmail.com> Closes #2689 from davies/docs and squashes the following commits: bf4a0a5 [Davies Liu] fix links 3fb1572 [Davies Liu] fix _static in jekyll 65a287e [Davies Liu] fix scripts and logo 8524042 [Davies Liu] Merge branch 'master' of github.com:apache/spark into docs d5b874a [Davies Liu] Merge branch 'master' of github.com:apache/spark into docs 4bc1c3c [Davies Liu] refactor 746d0b6 [Davies Liu] @param -> :param 240b393 [Davies Liu] replace epydoc with sphinx doc
Diffstat (limited to 'python/pyspark/mllib')
-rw-r--r--python/pyspark/mllib/classification.py32
-rw-r--r--python/pyspark/mllib/linalg.py8
-rw-r--r--python/pyspark/mllib/regression.py18
-rw-r--r--python/pyspark/mllib/util.py18
4 files changed, 38 insertions, 38 deletions
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index a765b1c4f7..cd43982191 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -79,15 +79,15 @@ class LogisticRegressionWithSGD(object):
"""
Train a logistic regression model on the given data.
- @param data: The training data.
- @param iterations: The number of iterations (default: 100).
- @param step: The step parameter used in SGD
+ :param data: The training data.
+ :param iterations: The number of iterations (default: 100).
+ :param step: The step parameter used in SGD
(default: 1.0).
- @param miniBatchFraction: Fraction of data to be used for each SGD
+ :param miniBatchFraction: Fraction of data to be used for each SGD
iteration.
- @param initialWeights: The initial weights (default: None).
- @param regParam: The regularizer parameter (default: 1.0).
- @param regType: The type of regularizer used for training
+ :param initialWeights: The initial weights (default: None).
+ :param regParam: The regularizer parameter (default: 1.0).
+ :param regType: The type of regularizer used for training
our model.
:Allowed values:
@@ -151,15 +151,15 @@ class SVMWithSGD(object):
"""
Train a support vector machine on the given data.
- @param data: The training data.
- @param iterations: The number of iterations (default: 100).
- @param step: The step parameter used in SGD
+ :param data: The training data.
+ :param iterations: The number of iterations (default: 100).
+ :param step: The step parameter used in SGD
(default: 1.0).
- @param regParam: The regularizer parameter (default: 1.0).
- @param miniBatchFraction: Fraction of data to be used for each SGD
+ :param regParam: The regularizer parameter (default: 1.0).
+ :param miniBatchFraction: Fraction of data to be used for each SGD
iteration.
- @param initialWeights: The initial weights (default: None).
- @param regType: The type of regularizer used for training
+ :param initialWeights: The initial weights (default: None).
+ :param regType: The type of regularizer used for training
our model.
:Allowed values:
@@ -238,10 +238,10 @@ class NaiveBayes(object):
classification. By making every vector a 0-1 vector, it can also be
used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}).
- @param data: RDD of NumPy vectors, one per element, where the first
+ :param data: RDD of NumPy vectors, one per element, where the first
coordinate is the label and the rest is the feature vector
(e.g. a count vector).
- @param lambda_: The smoothing parameter
+ :param lambda_: The smoothing parameter
"""
sc = data.context
jlist = sc._jvm.PythonMLLibAPI().trainNaiveBayes(data._to_java_object_rdd(), lambda_)
diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py
index 51014a8ceb..24c5480b2f 100644
--- a/python/pyspark/mllib/linalg.py
+++ b/python/pyspark/mllib/linalg.py
@@ -238,8 +238,8 @@ class SparseVector(Vector):
(index, value) pairs, or two separate arrays of indices and
values (sorted by index).
- @param size: Size of the vector.
- @param args: Non-zero entries, as a dictionary, list of tupes,
+ :param size: Size of the vector.
+ :param args: Non-zero entries, as a dictionary, list of tupes,
or two sorted lists containing indices and values.
>>> print SparseVector(4, {1: 1.0, 3: 5.5})
@@ -458,8 +458,8 @@ class Vectors(object):
(index, value) pairs, or two separate arrays of indices and
values (sorted by index).
- @param size: Size of the vector.
- @param args: Non-zero entries, as a dictionary, list of tupes,
+ :param size: Size of the vector.
+ :param args: Non-zero entries, as a dictionary, list of tupes,
or two sorted lists containing indices and values.
>>> print Vectors.sparse(4, {1: 1.0, 3: 5.5})
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 54f34a9833..12b322aaae 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -31,8 +31,8 @@ class LabeledPoint(object):
"""
The features and labels of a data point.
- @param label: Label for this data point.
- @param features: Vector of features for this point (NumPy array, list,
+ :param label: Label for this data point.
+ :param features: Vector of features for this point (NumPy array, list,
pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix)
"""
@@ -145,15 +145,15 @@ class LinearRegressionWithSGD(object):
"""
Train a linear regression model on the given data.
- @param data: The training data.
- @param iterations: The number of iterations (default: 100).
- @param step: The step parameter used in SGD
+ :param data: The training data.
+ :param iterations: The number of iterations (default: 100).
+ :param step: The step parameter used in SGD
(default: 1.0).
- @param miniBatchFraction: Fraction of data to be used for each SGD
+ :param miniBatchFraction: Fraction of data to be used for each SGD
iteration.
- @param initialWeights: The initial weights (default: None).
- @param regParam: The regularizer parameter (default: 1.0).
- @param regType: The type of regularizer used for training
+ :param initialWeights: The initial weights (default: None).
+ :param regParam: The regularizer parameter (default: 1.0).
+ :param regType: The type of regularizer used for training
our model.
:Allowed values:
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 8233d4e81f..1357fd4fbc 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -77,10 +77,10 @@ class MLUtils(object):
method parses each line into a LabeledPoint, where the feature
indices are converted to zero-based.
- @param sc: Spark context
- @param path: file or directory path in any Hadoop-supported file
+ :param sc: Spark context
+ :param path: file or directory path in any Hadoop-supported file
system URI
- @param numFeatures: number of features, which will be determined
+ :param numFeatures: number of features, which will be determined
from the input data if a nonpositive value
is given. This is useful when the dataset is
already split into multiple files and you
@@ -88,7 +88,7 @@ class MLUtils(object):
features may not present in certain files,
which leads to inconsistent feature
dimensions.
- @param minPartitions: min number of partitions
+ :param minPartitions: min number of partitions
@return: labeled data stored as an RDD of LabeledPoint
>>> from tempfile import NamedTemporaryFile
@@ -126,8 +126,8 @@ class MLUtils(object):
"""
Save labeled data in LIBSVM format.
- @param data: an RDD of LabeledPoint to be saved
- @param dir: directory to save the data
+ :param data: an RDD of LabeledPoint to be saved
+ :param dir: directory to save the data
>>> from tempfile import NamedTemporaryFile
>>> from fileinput import input
@@ -149,10 +149,10 @@ class MLUtils(object):
"""
Load labeled points saved using RDD.saveAsTextFile.
- @param sc: Spark context
- @param path: file or directory path in any Hadoop-supported file
+ :param sc: Spark context
+ :param path: file or directory path in any Hadoop-supported file
system URI
- @param minPartitions: min number of partitions
+ :param minPartitions: min number of partitions
@return: labeled data stored as an RDD of LabeledPoint
>>> from tempfile import NamedTemporaryFile