[SPARK-9774] [ML] [PYSPARK] Add python api for ml regression isotonicregression

Add the Python API for isotonicregression. Author: Holden Karau <holden@pigscanfly.ca> Closes #8214 from holdenk/SPARK-9774-add-python-api-for-ml-regression-isotonicregression.
author: Holden Karau <holden@pigscanfly.ca> 2015-10-07 17:50:35 -0700
committer: Joseph K. Bradley <joseph@databricks.com> 2015-10-07 17:50:35 -0700
commit: 3aff0866a8601b4daf760d6bf175f68d5a0c8912 (patch)
tree: 6f9085d991fb04be7d00fb8159692dfe75fbd30f /python/pyspark/ml/regression.py
parent: 1bc435ae3afb7a007b8a8ff00dcad4738a9ff055 (diff)
download: spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.tar.gz
spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.tar.bz2
spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.zip
1 files changed, 118 insertions, 0 deletions
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index e12abeba01..eb5f4bd6d7 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -25,6 +25,7 @@ from pyspark.mllib.common import inherit_doc
 __all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
            'DecisionTreeRegressor', 'DecisionTreeRegressionModel',
            'GBTRegressor', 'GBTRegressionModel',
+           'IsotonicRegression', 'IsotonicRegressionModel',
            'LinearRegression', 'LinearRegressionModel',
            'RandomForestRegressor', 'RandomForestRegressionModel']
 
@@ -142,6 +143,123 @@ class LinearRegressionModel(JavaModel):
         return self._call_java("intercept")
 
 
+@inherit_doc
+class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                         HasWeightCol):
+    """
+    .. note:: Experimental
+
+    Currently implemented using parallelized pool adjacent violators algorithm.
+    Only univariate (single feature) algorithm supported.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> df = sqlContext.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> ir = IsotonicRegression()
+    >>> model = ir.fit(df)
+    >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> model.boundaries
+    DenseVector([0.0, 1.0])
+    """
+
+    # a placeholder to make it appear in the generated doc
+    isotonic = \
+        Param(Params._dummy(), "isotonic",
+              "whether the output sequence should be isotonic/increasing (true) or" +
+              "antitonic/decreasing (false).")
+    featureIndex = \
+        Param(Params._dummy(), "featureIndex",
+              "The index of the feature if featuresCol is a vector column, no effect otherwise.")
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 weightCol=None, isotonic=True, featureIndex=0):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 weightCol=None, isotonic=True, featureIndex=0):
+        """
+        super(IsotonicRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.IsotonicRegression", self.uid)
+        self.isotonic = \
+            Param(self, "isotonic",
+                  "whether the output sequence should be isotonic/increasing (true) or" +
+                  "antitonic/decreasing (false).")
+        self.featureIndex = \
+            Param(self, "featureIndex",
+                  "The index of the feature if featuresCol is a vector column, no effect " +
+                  "otherwise.")
+        self._setDefault(isotonic=True, featureIndex=0)
+        kwargs = self.__init__._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  weightCol=None, isotonic=True, featureIndex=0):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 weightCol=None, isotonic=True, featureIndex=0):
+        Set the params for IsotonicRegression.
+        """
+        kwargs = self.setParams._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return IsotonicRegressionModel(java_model)
+
+    def setIsotonic(self, value):
+        """
+        Sets the value of :py:attr:`isotonic`.
+        """
+        self._paramMap[self.isotonic] = value
+        return self
+
+    def getIsotonic(self):
+        """
+        Gets the value of isotonic or its default value.
+        """
+        return self.getOrDefault(self.isotonic)
+
+    def setFeatureIndex(self, value):
+        """
+        Sets the value of :py:attr:`featureIndex`.
+        """
+        self._paramMap[self.featureIndex] = value
+        return self
+
+    def getFeatureIndex(self):
+        """
+        Gets the value of featureIndex or its default value.
+        """
+        return self.getOrDefault(self.featureIndex)
+
+
+class IsotonicRegressionModel(JavaModel):
+    """
+    .. note:: Experimental
+
+    Model fitted by IsotonicRegression.
+    """
+
+    @property
+    def boundaries(self):
+        """
+        Model boundaries.
+        """
+        return self._call_java("boundaries")
+
+    @property
+    def predictions(self):
+        """
+        Predictions associated with the boundaries at the same index, monotone because of isotonic
+        regression.
+        """
+        return self._call_java("predictions")
+
+
 class TreeRegressorParams(object):
     """
     Private class to track supported impurity measures.
author	Holden Karau <holden@pigscanfly.ca>	2015-10-07 17:50:35 -0700
committer	Joseph K. Bradley <joseph@databricks.com>	2015-10-07 17:50:35 -0700
commit	3aff0866a8601b4daf760d6bf175f68d5a0c8912 (patch)
tree	6f9085d991fb04be7d00fb8159692dfe75fbd30f /python/pyspark/ml/regression.py
parent	1bc435ae3afb7a007b8a8ff00dcad4738a9ff055 (diff)
download	spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.tar.gz spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.tar.bz2 spark-3aff0866a8601b4daf760d6bf175f68d5a0c8912.zip