aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/docs/modules.rst7
-rw-r--r--python/pyspark/context.py1
-rw-r--r--python/pyspark/mllib/classification.py26
-rw-r--r--python/pyspark/mllib/regression.py15
-rw-r--r--python/pyspark/mllib/tree.py1
-rw-r--r--python/pyspark/rdd.py1
6 files changed, 28 insertions, 23 deletions
diff --git a/python/docs/modules.rst b/python/docs/modules.rst
deleted file mode 100644
index 183564659f..0000000000
--- a/python/docs/modules.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-.
-=
-
-.. toctree::
- :maxdepth: 4
-
- pyspark
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index e9418320ff..a45d79d642 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -410,6 +410,7 @@ class SparkContext(object):
Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS,
a local file system (available on all nodes), or any Hadoop-supported file system URI.
The mechanism is as follows:
+
1. A Java RDD is created from the SequenceFile or other InputFormat, and the key
and value Writable classes
2. Serialization is attempted via Pyrolite pickling
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index ac142fb49a..a765b1c4f7 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -89,11 +89,14 @@ class LogisticRegressionWithSGD(object):
@param regParam: The regularizer parameter (default: 1.0).
@param regType: The type of regularizer used for training
our model.
- Allowed values: "l1" for using L1Updater,
- "l2" for using
- SquaredL2Updater,
- "none" for no regularizer.
- (default: "none")
+
+ :Allowed values:
+ - "l1" for using L1Updater
+ - "l2" for using SquaredL2Updater
+ - "none" for no regularizer
+
+ (default: "none")
+
@param intercept: Boolean parameter which indicates the use
or not of the augmented representation for
training data (i.e. whether bias features
@@ -158,11 +161,14 @@ class SVMWithSGD(object):
@param initialWeights: The initial weights (default: None).
@param regType: The type of regularizer used for training
our model.
- Allowed values: "l1" for using L1Updater,
- "l2" for using
- SquaredL2Updater,
- "none" for no regularizer.
- (default: "none")
+
+ :Allowed values:
+ - "l1" for using L1Updater
+ - "l2" for using SquaredL2Updater,
+ - "none" for no regularizer.
+
+ (default: "none")
+
@param intercept: Boolean parameter which indicates the use
or not of the augmented representation for
training data (i.e. whether bias features
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 8fe8c6db2a..54f34a9833 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -22,7 +22,7 @@ from pyspark import SparkContext
from pyspark.mllib.linalg import SparseVector, _convert_to_vector
from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
-__all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel'
+__all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'RidgeRegressionModel',
'LinearRegressionWithSGD', 'LassoWithSGD', 'RidgeRegressionWithSGD']
@@ -155,11 +155,14 @@ class LinearRegressionWithSGD(object):
@param regParam: The regularizer parameter (default: 1.0).
@param regType: The type of regularizer used for training
our model.
- Allowed values: "l1" for using L1Updater,
- "l2" for using
- SquaredL2Updater,
- "none" for no regularizer.
- (default: "none")
+
+ :Allowed values:
+ - "l1" for using L1Updater,
+ - "l2" for using SquaredL2Updater,
+ - "none" for no regularizer.
+
+ (default: "none")
+
@param intercept: Boolean parameter which indicates the use
or not of the augmented representation for
training data (i.e. whether bias features
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index afdcdbdf3a..5d7abfb96b 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -48,6 +48,7 @@ class DecisionTreeModel(object):
def predict(self, x):
"""
Predict the label of one or more examples.
+
:param x: Data point (feature vector),
or an RDD of data points (feature vectors).
"""
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index dc6497772e..e77669aad7 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1208,6 +1208,7 @@ class RDD(object):
Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
system, using the L{org.apache.hadoop.io.Writable} types that we convert from the
RDD's key and value types. The mechanism is as follows:
+
1. Pyrolite is used to convert pickled Python RDD into RDD of Java objects.
2. Keys and values of this Java RDD are converted to Writables and written out.