aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py6
-rw-r--r--python/pyspark/ml/param/shared.py30
-rw-r--r--python/pyspark/ml/tests.py4
-rw-r--r--tox.ini2
4 files changed, 20 insertions, 22 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index ed3171b697..3be0979b92 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -88,12 +88,12 @@ if __name__ == "__main__":
print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n")
print("from pyspark.ml.param import Param, Params\n\n")
shared = [
- ("maxIter", "max number of iterations", None),
- ("regParam", "regularization constant", None),
+ ("maxIter", "max number of iterations (>= 0)", None),
+ ("regParam", "regularization parameter (>= 0)", None),
("featuresCol", "features column name", "'features'"),
("labelCol", "label column name", "'label'"),
("predictionCol", "prediction column name", "'prediction'"),
- ("rawPredictionCol", "raw prediction column name", "'rawPrediction'"),
+ ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"),
("inputCol", "input column name", None),
("inputCols", "input column names", None),
("outputCol", "output column name", None),
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index d0bcadee22..4b22322b89 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -22,16 +22,16 @@ from pyspark.ml.param import Param, Params
class HasMaxIter(Params):
"""
- Mixin for param maxIter: max number of iterations.
+ Mixin for param maxIter: max number of iterations (>= 0).
"""
# a placeholder to make it appear in the generated doc
- maxIter = Param(Params._dummy(), "maxIter", "max number of iterations")
+ maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0)")
def __init__(self):
super(HasMaxIter, self).__init__()
- #: param for max number of iterations
- self.maxIter = Param(self, "maxIter", "max number of iterations")
+ #: param for max number of iterations (>= 0)
+ self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)")
if None is not None:
self._setDefault(maxIter=None)
@@ -51,16 +51,16 @@ class HasMaxIter(Params):
class HasRegParam(Params):
"""
- Mixin for param regParam: regularization constant.
+ Mixin for param regParam: regularization parameter (>= 0).
"""
# a placeholder to make it appear in the generated doc
- regParam = Param(Params._dummy(), "regParam", "regularization constant")
+ regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0)")
def __init__(self):
super(HasRegParam, self).__init__()
- #: param for regularization constant
- self.regParam = Param(self, "regParam", "regularization constant")
+ #: param for regularization parameter (>= 0)
+ self.regParam = Param(self, "regParam", "regularization parameter (>= 0)")
if None is not None:
self._setDefault(regParam=None)
@@ -167,16 +167,16 @@ class HasPredictionCol(Params):
class HasRawPredictionCol(Params):
"""
- Mixin for param rawPredictionCol: raw prediction column name.
+ Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.
"""
# a placeholder to make it appear in the generated doc
- rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction column name")
+ rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
def __init__(self):
super(HasRawPredictionCol, self).__init__()
- #: param for raw prediction column name
- self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction column name")
+ #: param for raw prediction (a.k.a. confidence) column name
+ self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name")
if 'rawPrediction' is not None:
self._setDefault(rawPredictionCol='rawPrediction')
@@ -403,14 +403,12 @@ class HasStepSize(Params):
"""
# a placeholder to make it appear in the generated doc
- stepSize = Param(Params._dummy(), "stepSize",
- "Step size to be used for each iteration of optimization.")
+ stepSize = Param(Params._dummy(), "stepSize", "Step size to be used for each iteration of optimization.")
def __init__(self):
super(HasStepSize, self).__init__()
#: param for Step size to be used for each iteration of optimization.
- self.stepSize = Param(self, "stepSize",
- "Step size to be used for each iteration of optimization.")
+ self.stepSize = Param(self, "stepSize", "Step size to be used for each iteration of optimization.")
if None is not None:
self._setDefault(stepSize=None)
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 75bb5d749c..ba6478dcd5 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -128,7 +128,7 @@ class ParamTests(PySparkTestCase):
testParams = TestParams()
maxIter = testParams.maxIter
self.assertEqual(maxIter.name, "maxIter")
- self.assertEqual(maxIter.doc, "max number of iterations")
+ self.assertEqual(maxIter.doc, "max number of iterations (>= 0)")
self.assertTrue(maxIter.parent is testParams)
def test_params(self):
@@ -156,7 +156,7 @@ class ParamTests(PySparkTestCase):
self.assertEquals(
testParams.explainParams(),
"\n".join(["inputCol: input column name (undefined)",
- "maxIter: max number of iterations (default: 10, current: 100)"]))
+ "maxIter: max number of iterations (>= 0) (default: 10, current: 100)"]))
if __name__ == "__main__":
diff --git a/tox.ini b/tox.ini
index b568029a20..76e3f42cde 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,4 +15,4 @@
[pep8]
max-line-length=100
-exclude=cloudpickle.py,heapq3.py
+exclude=cloudpickle.py,heapq3.py,shared.py