aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/regression.py
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-10-07 17:50:35 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-10-07 17:50:35 -0700
commit3aff0866a8601b4daf760d6bf175f68d5a0c8912 (patch)
tree6f9085d991fb04be7d00fb8159692dfe75fbd30f /python/pyspark/ml/regression.py
parent1bc435ae3afb7a007b8a8ff00dcad4738a9ff055 (diff)
downloadspark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.tar.gz
spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.tar.bz2
spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.zip
[SPARK-9774] [ML] [PYSPARK] Add python api for ml regression isotonicregression
Add the Python API for isotonicregression. Author: Holden Karau <holden@pigscanfly.ca> Closes #8214 from holdenk/SPARK-9774-add-python-api-for-ml-regression-isotonicregression.
Diffstat (limited to 'python/pyspark/ml/regression.py')
-rw-r--r--python/pyspark/ml/regression.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index e12abeba01..eb5f4bd6d7 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -25,6 +25,7 @@ from pyspark.mllib.common import inherit_doc
__all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
'DecisionTreeRegressor', 'DecisionTreeRegressionModel',
'GBTRegressor', 'GBTRegressionModel',
+ 'IsotonicRegression', 'IsotonicRegressionModel',
'LinearRegression', 'LinearRegressionModel',
'RandomForestRegressor', 'RandomForestRegressionModel']
@@ -142,6 +143,123 @@ class LinearRegressionModel(JavaModel):
return self._call_java("intercept")
+@inherit_doc
+class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+ HasWeightCol):
+ """
+ .. note:: Experimental
+
+ Currently implemented using parallelized pool adjacent violators algorithm.
+ Only univariate (single feature) algorithm supported.
+
+ >>> from pyspark.mllib.linalg import Vectors
+ >>> df = sqlContext.createDataFrame([
+ ... (1.0, Vectors.dense(1.0)),
+ ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+ >>> ir = IsotonicRegression()
+ >>> model = ir.fit(df)
+ >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+ >>> model.transform(test0).head().prediction
+ 0.0
+ >>> model.boundaries
+ DenseVector([0.0, 1.0])
+ """
+
+ # a placeholder to make it appear in the generated doc
+ isotonic = \
+ Param(Params._dummy(), "isotonic",
+ "whether the output sequence should be isotonic/increasing (true) or" +
+ "antitonic/decreasing (false).")
+ featureIndex = \
+ Param(Params._dummy(), "featureIndex",
+ "The index of the feature if featuresCol is a vector column, no effect otherwise.")
+
+ @keyword_only
+ def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+ weightCol=None, isotonic=True, featureIndex=0):
+ """
+ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ weightCol=None, isotonic=True, featureIndex=0):
+ """
+ super(IsotonicRegression, self).__init__()
+ self._java_obj = self._new_java_obj(
+ "org.apache.spark.ml.regression.IsotonicRegression", self.uid)
+ self.isotonic = \
+ Param(self, "isotonic",
+ "whether the output sequence should be isotonic/increasing (true) or" +
+ "antitonic/decreasing (false).")
+ self.featureIndex = \
+ Param(self, "featureIndex",
+ "The index of the feature if featuresCol is a vector column, no effect " +
+ "otherwise.")
+ self._setDefault(isotonic=True, featureIndex=0)
+ kwargs = self.__init__._input_kwargs
+ self.setParams(**kwargs)
+
+ @keyword_only
+ def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+ weightCol=None, isotonic=True, featureIndex=0):
+ """
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ weightCol=None, isotonic=True, featureIndex=0):
+ Set the params for IsotonicRegression.
+ """
+ kwargs = self.setParams._input_kwargs
+ return self._set(**kwargs)
+
+ def _create_model(self, java_model):
+ return IsotonicRegressionModel(java_model)
+
+ def setIsotonic(self, value):
+ """
+ Sets the value of :py:attr:`isotonic`.
+ """
+ self._paramMap[self.isotonic] = value
+ return self
+
+ def getIsotonic(self):
+ """
+ Gets the value of isotonic or its default value.
+ """
+ return self.getOrDefault(self.isotonic)
+
+ def setFeatureIndex(self, value):
+ """
+ Sets the value of :py:attr:`featureIndex`.
+ """
+ self._paramMap[self.featureIndex] = value
+ return self
+
+ def getFeatureIndex(self):
+ """
+ Gets the value of featureIndex or its default value.
+ """
+ return self.getOrDefault(self.featureIndex)
+
+
+class IsotonicRegressionModel(JavaModel):
+ """
+ .. note:: Experimental
+
+ Model fitted by IsotonicRegression.
+ """
+
+ @property
+ def boundaries(self):
+ """
+ Model boundaries.
+ """
+ return self._call_java("boundaries")
+
+ @property
+ def predictions(self):
+ """
+ Predictions associated with the boundaries at the same index, monotone because of isotonic
+ regression.
+ """
+ return self._call_java("predictions")
+
+
class TreeRegressorParams(object):
"""
Private class to track supported impurity measures.