aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/param
diff options
context:
space:
mode:
authorwm624@hotmail.com <wm624@hotmail.com>2016-04-08 10:47:05 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-04-08 10:47:05 -0700
commite0ad75f2b55772efc82a6f8ebb1b2d80fe27d9b5 (patch)
tree600d2505875b418ec19f461416c42585693a92d6 /python/pyspark/ml/param
parente5d8d6e09cad304e353c96f9408fb9f799348827 (diff)
downloadspark-e0ad75f2b55772efc82a6f8ebb1b2d80fe27d9b5.tar.gz
spark-e0ad75f2b55772efc82a6f8ebb1b2d80fe27d9b5.tar.bz2
spark-e0ad75f2b55772efc82a6f8ebb1b2d80fe27d9b5.zip
[SPARK-12569][PYSPARK][ML] DecisionTreeRegressor: provide variance of prediction: Python API
## What changes were proposed in this pull request? A new column VarianceCol has been added to DecisionTreeRegressor in ML scala code. This patch adds the corresponding Python API, HasVarianceCol, to class DecisionTreeRegressor. ## How was this patch tested? ./dev/lint-python PEP8 checks passed. rm -rf _build/* pydoc checks passed. ./python/run-tests --python-executables=python2.7 --modules=pyspark-ml Running PySpark tests. Output is in /Users/mwang/spark_ws_0904/python/unit-tests.log Will test against the following Python executables: ['python2.7'] Will test the following Python modules: ['pyspark-ml'] Finished test(python2.7): pyspark.ml.evaluation (12s) Finished test(python2.7): pyspark.ml.clustering (18s) Finished test(python2.7): pyspark.ml.classification (30s) Finished test(python2.7): pyspark.ml.recommendation (28s) Finished test(python2.7): pyspark.ml.feature (43s) Finished test(python2.7): pyspark.ml.regression (31s) Finished test(python2.7): pyspark.ml.tuning (19s) Finished test(python2.7): pyspark.ml.tests (34s) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: wm624@hotmail.com <wm624@hotmail.com> Closes #12116 from wangmiao1981/fix_api.
Diffstat (limited to 'python/pyspark/ml/param')
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py4
-rw-r--r--python/pyspark/ml/param/shared.py24
2 files changed, 27 insertions, 1 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 715fa9e9f8..a7615c43be 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -146,7 +146,9 @@ if __name__ == "__main__":
("weightCol", "weight column name. If this is not set or empty, we treat " +
"all instance weights as 1.0.", None, "TypeConverters.toString"),
("solver", "the solver algorithm for optimization. If this is not set or empty, " +
- "default value is 'auto'.", "'auto'", "TypeConverters.toString")]
+ "default value is 'auto'.", "'auto'", "TypeConverters.toString"),
+ ("varianceCol", "column name for the biased sample variance of prediction.",
+ None, "TypeConverters.toString")]
code = []
for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index d79d55e463..c9e975525c 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -559,6 +559,30 @@ class HasSolver(Params):
return self.getOrDefault(self.solver)
+class HasVarianceCol(Params):
+ """
+ Mixin for param varianceCol: column name for the biased sample variance of prediction.
+ """
+
+ varianceCol = Param(Params._dummy(), "varianceCol", "column name for the biased sample variance of prediction.", typeConverter=TypeConverters.toString)
+
+ def __init__(self):
+ super(HasVarianceCol, self).__init__()
+
+ def setVarianceCol(self, value):
+ """
+ Sets the value of :py:attr:`varianceCol`.
+ """
+ self._set(varianceCol=value)
+ return self
+
+ def getVarianceCol(self):
+ """
+ Gets the value of varianceCol or its default value.
+ """
+ return self.getOrDefault(self.varianceCol)
+
+
class DecisionTreeParams(Params):
"""
Mixin for Decision Tree parameters.