aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
author=^_^= <maxmoroz@gmail.com>2016-08-03 04:18:28 -0700
committerSean Owen <sowen@cloudera.com>2016-08-03 04:18:28 -0700
commit639df046a250873c26446a037cb832ab28cb5272 (patch)
tree04d2ba46953d1d9c3ed0c452c1263d0acec0498f /python
parentae226283e19ce396216c73b0ae2470efa122b65b (diff)
downloadspark-639df046a250873c26446a037cb832ab28cb5272.tar.gz
spark-639df046a250873c26446a037cb832ab28cb5272.tar.bz2
spark-639df046a250873c26446a037cb832ab28cb5272.zip
[SPARK-16831][PYTHON] Fixed bug in CrossValidator.avgMetrics
## What changes were proposed in this pull request? avgMetrics was summed, not averaged, across folds Author: =^_^= <maxmoroz@gmail.com> Closes #14456 from pkch/pkch-patch-1.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/ml/tuning.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 7f967e5463..2dcc99cef8 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -166,6 +166,8 @@ class CrossValidator(Estimator, ValidatorParams):
>>> evaluator = BinaryClassificationEvaluator()
>>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
>>> cvModel = cv.fit(dataset)
+ >>> cvModel.avgMetrics[0]
+ 0.5
>>> evaluator.evaluate(cvModel.transform(dataset))
0.8333...
@@ -234,7 +236,7 @@ class CrossValidator(Estimator, ValidatorParams):
model = est.fit(train, epm[j])
# TODO: duplicate evaluator to take extra params from input
metric = eva.evaluate(model.transform(validation, epm[j]))
- metrics[j] += metric
+ metrics[j] += metric/nFolds
if eva.isLargerBetter():
bestIndex = np.argmax(metrics)