aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--python/pyspark/ml/param/_shared_params_code_gen.py4
-rw-r--r--python/pyspark/ml/param/shared.py24
-rw-r--r--python/pyspark/ml/regression.py14
3 files changed, 35 insertions, 7 deletions
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 715fa9e9f8..a7615c43be 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -146,7 +146,9 @@ if __name__ == "__main__":
("weightCol", "weight column name. If this is not set or empty, we treat " +
"all instance weights as 1.0.", None, "TypeConverters.toString"),
("solver", "the solver algorithm for optimization. If this is not set or empty, " +
- "default value is 'auto'.", "'auto'", "TypeConverters.toString")]
+ "default value is 'auto'.", "'auto'", "TypeConverters.toString"),
+ ("varianceCol", "column name for the biased sample variance of prediction.",
+ None, "TypeConverters.toString")]
code = []
for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index d79d55e463..c9e975525c 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -559,6 +559,30 @@ class HasSolver(Params):
return self.getOrDefault(self.solver)
+class HasVarianceCol(Params):
+ """
+ Mixin for param varianceCol: column name for the biased sample variance of prediction.
+ """
+
+ varianceCol = Param(Params._dummy(), "varianceCol", "column name for the biased sample variance of prediction.", typeConverter=TypeConverters.toString)
+
+ def __init__(self):
+ super(HasVarianceCol, self).__init__()
+
+ def setVarianceCol(self, value):
+ """
+ Sets the value of :py:attr:`varianceCol`.
+ """
+ self._set(varianceCol=value)
+ return self
+
+ def getVarianceCol(self):
+ """
+ Gets the value of varianceCol or its default value.
+ """
+ return self.getOrDefault(self.varianceCol)
+
+
class DecisionTreeParams(Params):
"""
Mixin for Decision Tree parameters.
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 00a6a0de90..f6c5d130dd 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -630,7 +630,7 @@ class GBTParams(TreeEnsembleParams):
@inherit_doc
class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
- HasSeed, JavaMLWritable, JavaMLReadable):
+ HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
"""
`http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree`
learning algorithm for regression.
@@ -640,7 +640,7 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
- >>> dt = DecisionTreeRegressor(maxDepth=2)
+ >>> dt = DecisionTreeRegressor(maxDepth=2, varianceCol="variance")
>>> model = dt.fit(df)
>>> model.depth
1
@@ -666,6 +666,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
True
>>> model.depth == model2.depth
True
+ >>> model.transform(test1).head().variance
+ 0.0
.. versionadded:: 1.4.0
"""
@@ -674,12 +676,12 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
- seed=None):
+ seed=None, varianceCol=None):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
- impurity="variance", seed=None)
+ impurity="variance", seed=None, varianceCol=None)
"""
super(DecisionTreeRegressor, self).__init__()
self._java_obj = self._new_java_obj(
@@ -695,12 +697,12 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
- impurity="variance", seed=None):
+ impurity="variance", seed=None, varianceCol=None):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
- impurity="variance", seed=None)
+ impurity="variance", seed=None, varianceCol=None)
Sets params for the DecisionTreeRegressor.
"""
kwargs = self.setParams._input_kwargs