diff options
author | =^_^= <maxmoroz@gmail.com> | 2016-08-03 04:18:28 -0700 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-08-03 04:18:28 -0700 |
commit | 639df046a250873c26446a037cb832ab28cb5272 (patch) | |
tree | 04d2ba46953d1d9c3ed0c452c1263d0acec0498f | |
parent | ae226283e19ce396216c73b0ae2470efa122b65b (diff) | |
download | spark-639df046a250873c26446a037cb832ab28cb5272.tar.gz spark-639df046a250873c26446a037cb832ab28cb5272.tar.bz2 spark-639df046a250873c26446a037cb832ab28cb5272.zip |
[SPARK-16831][PYTHON] Fixed bug in CrossValidator.avgMetrics
## What changes were proposed in this pull request?
avgMetrics was summed, not averaged, across folds
Author: =^_^= <maxmoroz@gmail.com>
Closes #14456 from pkch/pkch-patch-1.
-rw-r--r-- | python/pyspark/ml/tuning.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 7f967e5463..2dcc99cef8 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -166,6 +166,8 @@ class CrossValidator(Estimator, ValidatorParams): >>> evaluator = BinaryClassificationEvaluator() >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator) >>> cvModel = cv.fit(dataset) + >>> cvModel.avgMetrics[0] + 0.5 >>> evaluator.evaluate(cvModel.transform(dataset)) 0.8333... @@ -234,7 +236,7 @@ class CrossValidator(Estimator, ValidatorParams): model = est.fit(train, epm[j]) # TODO: duplicate evaluator to take extra params from input metric = eva.evaluate(model.transform(validation, epm[j])) - metrics[j] += metric + metrics[j] += metric/nFolds if eva.isLargerBetter(): bestIndex = np.argmax(metrics) |