aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/ml/classification.py39
-rw-r--r--python/pyspark/ml/feature.py8
-rw-r--r--python/pyspark/ml/recommendation.py8
-rw-r--r--python/pyspark/ml/regression.py38
4 files changed, 45 insertions, 48 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 8c9a55e79a..1411d3fd9c 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -71,7 +71,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
threshold=0.5, probabilityCol="probability"):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+ maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
threshold=0.5, probabilityCol="probability")
"""
super(LogisticRegression, self).__init__()
@@ -96,8 +96,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
threshold=0.5, probabilityCol="probability"):
"""
- setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
threshold=0.5, probabilityCol="probability")
Sets params for logistic regression.
"""
@@ -220,7 +220,7 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
"""
super(DecisionTreeClassifier, self).__init__()
@@ -242,9 +242,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
impurity="gini"):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
- impurity="gini")
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini")
Sets params for the DecisionTreeClassifier.
"""
kwargs = self.setParams._input_kwargs
@@ -320,9 +319,9 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
numTrees=20, featureSubsetStrategy="auto", seed=42):
"""
- __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
numTrees=20, featureSubsetStrategy="auto", seed=42)
"""
super(RandomForestClassifier, self).__init__()
@@ -355,9 +354,9 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
impurity="gini", numTrees=20, featureSubsetStrategy="auto"):
"""
- setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
impurity="gini", numTrees=20, featureSubsetStrategy="auto")
Sets params for linear classification.
"""
@@ -471,10 +470,10 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
maxIter=20, stepSize=0.1):
"""
- __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
- maxIter=20, stepSize=0.1)
+ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+ lossType="logistic", maxIter=20, stepSize=0.1)
"""
super(GBTClassifier, self).__init__()
#: param for Loss function which GBT tries to minimize (case-insensitive).
@@ -502,9 +501,9 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
lossType="logistic", maxIter=20, stepSize=0.1):
"""
- setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
lossType="logistic", maxIter=20, stepSize=0.1)
Sets params for Gradient Boosted Tree Classification.
"""
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 30e1fd4922..58e22190c7 100644
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -481,7 +481,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
def __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
inputCol=None, outputCol=None):
"""
- __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+ __init__(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
inputCol=None, outputCol=None)
"""
super(RegexTokenizer, self).__init__()
@@ -496,7 +496,7 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol):
def setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
inputCol=None, outputCol=None):
"""
- setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+",
+ setParams(self, minTokenLength=1, gaps=False, pattern="\\p{L}+|[^\\p{L}\\s]+", \
inputCol="input", outputCol="output")
Sets params for this RegexTokenizer.
"""
@@ -869,7 +869,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=42, inputCol=None, outputCol=None):
"""
- __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+ __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
seed=42, inputCol=None, outputCol=None)
"""
super(Word2Vec, self).__init__()
@@ -889,7 +889,7 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
seed=42, inputCol=None, outputCol=None):
"""
- setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42,
+ setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=42, \
inputCol=None, outputCol=None)
Sets params for this Word2Vec.
"""
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 4846b907e8..b2439cbd96 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -92,8 +92,8 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
ratingCol="rating", nonnegative=False, checkpointInterval=10):
"""
- __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
- implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0,
+ __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+ implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=0, \
ratingCol="rating", nonnegative=false, checkpointInterval=10)
"""
super(ALS, self).__init__()
@@ -118,8 +118,8 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
ratingCol="rating", nonnegative=False, checkpointInterval=10):
"""
- setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
- implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0,
+ setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+ implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=0, \
ratingCol="rating", nonnegative=False, checkpointInterval=10)
Sets params for ALS.
"""
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2803864ff4..ef77e19327 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -33,8 +33,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
Linear regression.
The learning objective is to minimize the squared error, with regularization.
- The specific squared error loss function used is:
- L = 1/2n ||A weights - y||^2^
+ The specific squared error loss function used is: L = 1/2n ||A weights - y||^2^
This support multiple types of regularization:
- none (a.k.a. ordinary least squares)
@@ -191,7 +190,7 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance"):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
"""
super(DecisionTreeRegressor, self).__init__()
@@ -213,9 +212,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
impurity="variance"):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
- impurity="variance")
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance")
Sets params for the DecisionTreeRegressor.
"""
kwargs = self.setParams._input_kwargs
@@ -286,10 +284,10 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
numTrees=20, featureSubsetStrategy="auto", seed=42):
"""
- __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
- numTrees=20, featureSubsetStrategy="auto", seed=42)
+ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+ impurity="variance", numTrees=20, featureSubsetStrategy="auto", seed=42)
"""
super(RandomForestRegressor, self).__init__()
#: param for Criterion used for information gain calculation (case-insensitive).
@@ -321,9 +319,9 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
impurity="variance", numTrees=20, featureSubsetStrategy="auto"):
"""
- setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42,
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=42, \
impurity="variance", numTrees=20, featureSubsetStrategy="auto")
Sets params for linear regression.
"""
@@ -432,10 +430,10 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
maxIter=20, stepSize=0.1):
"""
- __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared",
- maxIter=20, stepSize=0.1)
+ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+ lossType="squared", maxIter=20, stepSize=0.1)
"""
super(GBTRegressor, self).__init__()
#: param for Loss function which GBT tries to minimize (case-insensitive).
@@ -463,9 +461,9 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
lossType="squared", maxIter=20, stepSize=0.1):
"""
- setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
- maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
- maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+ maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
lossType="squared", maxIter=20, stepSize=0.1)
Sets params for Gradient Boosted Tree Regression.
"""