From 639df046a250873c26446a037cb832ab28cb5272 Mon Sep 17 00:00:00 2001 From: =^_^= Date: Wed, 3 Aug 2016 04:18:28 -0700 Subject: [SPARK-16831][PYTHON] Fixed bug in CrossValidator.avgMetrics ## What changes were proposed in this pull request? avgMetrics was summed, not averaged, across folds Author: =^_^= Closes #14456 from pkch/pkch-patch-1. --- python/pyspark/ml/tuning.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'python') diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 7f967e5463..2dcc99cef8 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -166,6 +166,8 @@ class CrossValidator(Estimator, ValidatorParams): >>> evaluator = BinaryClassificationEvaluator() >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator) >>> cvModel = cv.fit(dataset) + >>> cvModel.avgMetrics[0] + 0.5 >>> evaluator.evaluate(cvModel.transform(dataset)) 0.8333... @@ -234,7 +236,7 @@ class CrossValidator(Estimator, ValidatorParams): model = est.fit(train, epm[j]) # TODO: duplicate evaluator to take extra params from input metric = eva.evaluate(model.transform(validation, epm[j])) - metrics[j] += metric + metrics[j] += metric/nFolds if eva.isLargerBetter(): bestIndex = np.argmax(metrics) -- cgit v1.2.3