aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/regression.py
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-05-18 12:02:18 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-18 12:02:18 -0700
commit9c7e802a5a2b8cd3eb77642f84c54a8e976fc996 (patch)
tree2e3b7e367f57b64ef46733ee8b64aa258e58cca8 /python/pyspark/ml/regression.py
parent56ede88485cfca90974425fcb603b257be47229b (diff)
downloadspark-9c7e802a5a2b8cd3eb77642f84c54a8e976fc996.tar.gz
spark-9c7e802a5a2b8cd3eb77642f84c54a8e976fc996.tar.bz2
spark-9c7e802a5a2b8cd3eb77642f84c54a8e976fc996.zip
[SPARK-7380] [MLLIB] pipeline stages should be copyable in Python
This PR makes pipeline stages in Python copyable and hence simplifies some implementations. It also includes the following changes: 1. Rename `paramMap` and `defaultParamMap` to `_paramMap` and `_defaultParamMap`, respectively. 2. Accept a list of param maps in `fit`. 3. Use parent uid and name to identify param. jkbradley Author: Xiangrui Meng <meng@databricks.com> Author: Joseph K. Bradley <joseph@databricks.com> Closes #6088 from mengxr/SPARK-7380 and squashes the following commits: 413c463 [Xiangrui Meng] remove unnecessary doc 4159f35 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7380 611c719 [Xiangrui Meng] fix python style 68862b8 [Xiangrui Meng] update _java_obj initialization 927ad19 [Xiangrui Meng] fix ml/tests.py 0138fc3 [Xiangrui Meng] update feature transformers and fix a bug in RegexTokenizer 9ca44fb [Xiangrui Meng] simplify Java wrappers and add tests c7d84ef [Xiangrui Meng] update ml/tests.py to test copy params 7e0d27f [Xiangrui Meng] merge master 46840fb [Xiangrui Meng] update wrappers b6db1ed [Xiangrui Meng] update all self.paramMap to self._paramMap 46cb6ed [Xiangrui Meng] merge master a163413 [Xiangrui Meng] fix style 1042e80 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into SPARK-7380 9630eae [Xiangrui Meng] fix Identifiable._randomUID 13bd70a [Xiangrui Meng] update ml/tests.py 64a536c [Xiangrui Meng] use _fit/_transform/_evaluate to simplify the impl 02abf13 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into copyable-python 66ce18c [Joseph K. Bradley] some cleanups before sending to Xiangrui 7431272 [Joseph K. Bradley] Rebased with master
Diffstat (limited to 'python/pyspark/ml/regression.py')
-rw-r--r--python/pyspark/ml/regression.py30
1 files changed, 17 insertions, 13 deletions
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index ef77e19327..ff809cdafd 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -62,7 +62,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
...
TypeError: Method setParams forces keyword arguments.
"""
- _java_class = "org.apache.spark.ml.regression.LinearRegression"
+
# a placeholder to make it appear in the generated doc
elasticNetParam = \
Param(Params._dummy(), "elasticNetParam",
@@ -77,6 +77,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6)
"""
super(LinearRegression, self).__init__()
+ self._java_obj = self._new_java_obj(
+ "org.apache.spark.ml.regression.LinearRegression", self.uid)
#: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty
# is an L2 penalty. For alpha = 1, it is an L1 penalty.
self.elasticNetParam = \
@@ -105,7 +107,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
"""
Sets the value of :py:attr:`elasticNetParam`.
"""
- self.paramMap[self.elasticNetParam] = value
+ self._paramMap[self.elasticNetParam] = value
return self
def getElasticNetParam(self):
@@ -178,7 +180,6 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
1.0
"""
- _java_class = "org.apache.spark.ml.regression.DecisionTreeRegressor"
# a placeholder to make it appear in the generated doc
impurity = Param(Params._dummy(), "impurity",
"Criterion used for information gain calculation (case-insensitive). " +
@@ -194,6 +195,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
"""
super(DecisionTreeRegressor, self).__init__()
+ self._java_obj = self._new_java_obj(
+ "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid)
#: param for Criterion used for information gain calculation (case-insensitive).
self.impurity = \
Param(self, "impurity",
@@ -226,7 +229,7 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
"""
Sets the value of :py:attr:`impurity`.
"""
- self.paramMap[self.impurity] = value
+ self._paramMap[self.impurity] = value
return self
def getImpurity(self):
@@ -264,7 +267,6 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
0.5
"""
- _java_class = "org.apache.spark.ml.regression.RandomForestRegressor"
# a placeholder to make it appear in the generated doc
impurity = Param(Params._dummy(), "impurity",
"Criterion used for information gain calculation (case-insensitive). " +
@@ -290,6 +292,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
impurity="variance", numTrees=20, featureSubsetStrategy="auto", seed=42)
"""
super(RandomForestRegressor, self).__init__()
+ self._java_obj = self._new_java_obj(
+ "org.apache.spark.ml.regression.RandomForestRegressor", self.uid)
#: param for Criterion used for information gain calculation (case-insensitive).
self.impurity = \
Param(self, "impurity",
@@ -335,7 +339,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
"""
Sets the value of :py:attr:`impurity`.
"""
- self.paramMap[self.impurity] = value
+ self._paramMap[self.impurity] = value
return self
def getImpurity(self):
@@ -348,7 +352,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
"""
Sets the value of :py:attr:`subsamplingRate`.
"""
- self.paramMap[self.subsamplingRate] = value
+ self._paramMap[self.subsamplingRate] = value
return self
def getSubsamplingRate(self):
@@ -361,7 +365,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
"""
Sets the value of :py:attr:`numTrees`.
"""
- self.paramMap[self.numTrees] = value
+ self._paramMap[self.numTrees] = value
return self
def getNumTrees(self):
@@ -374,7 +378,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
"""
Sets the value of :py:attr:`featureSubsetStrategy`.
"""
- self.paramMap[self.featureSubsetStrategy] = value
+ self._paramMap[self.featureSubsetStrategy] = value
return self
def getFeatureSubsetStrategy(self):
@@ -412,7 +416,6 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
1.0
"""
- _java_class = "org.apache.spark.ml.regression.GBTRegressor"
# a placeholder to make it appear in the generated doc
lossType = Param(Params._dummy(), "lossType",
"Loss function which GBT tries to minimize (case-insensitive). " +
@@ -436,6 +439,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
lossType="squared", maxIter=20, stepSize=0.1)
"""
super(GBTRegressor, self).__init__()
+ self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
#: param for Loss function which GBT tries to minimize (case-insensitive).
self.lossType = Param(self, "lossType",
"Loss function which GBT tries to minimize (case-insensitive). " +
@@ -477,7 +481,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
"""
Sets the value of :py:attr:`lossType`.
"""
- self.paramMap[self.lossType] = value
+ self._paramMap[self.lossType] = value
return self
def getLossType(self):
@@ -490,7 +494,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
"""
Sets the value of :py:attr:`subsamplingRate`.
"""
- self.paramMap[self.subsamplingRate] = value
+ self._paramMap[self.subsamplingRate] = value
return self
def getSubsamplingRate(self):
@@ -503,7 +507,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
"""
Sets the value of :py:attr:`stepSize`.
"""
- self.paramMap[self.stepSize] = value
+ self._paramMap[self.stepSize] = value
return self
def getStepSize(self):