aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@gmail.com>2015-06-20 13:01:59 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-06-20 13:01:59 -0700
commit0b8995168f02bb55afb0a5b7dbdb941c3c89cb4c (patch)
tree64a27502be793519bed306017f558f1a3fb15044 /python/pyspark
parent1b6fe9b1a70aa3f81448c2705ea3a4b501cbda9d (diff)
downloadspark-0b8995168f02bb55afb0a5b7dbdb941c3c89cb4c.tar.gz
spark-0b8995168f02bb55afb0a5b7dbdb941c3c89cb4c.tar.bz2
spark-0b8995168f02bb55afb0a5b7dbdb941c3c89cb4c.zip
[SPARK-8468] [ML] Take the negative of some metrics in RegressionEvaluator to get correct cross validation
JIRA: https://issues.apache.org/jira/browse/SPARK-8468 Author: Liang-Chi Hsieh <viirya@gmail.com> Closes #6905 from viirya/cv_min and squashes the following commits: 930d3db [Liang-Chi Hsieh] Fix python unit test and add document. d632135 [Liang-Chi Hsieh] Merge remote-tracking branch 'upstream/master' into cv_min 16e3b2c [Liang-Chi Hsieh] Take the negative instead of reciprocal. c3dd8d9 [Liang-Chi Hsieh] For comments. b5f52c1 [Liang-Chi Hsieh] Add param to CrossValidator for choosing whether to maximize evaulation value.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/ml/evaluation.py8
1 files changed, 5 insertions, 3 deletions
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index d8ddb78c6d..595593a7f2 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -160,13 +160,15 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol):
...
>>> evaluator = RegressionEvaluator(predictionCol="raw")
>>> evaluator.evaluate(dataset)
- 2.842...
+ -2.842...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"})
0.993...
>>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
- 2.649...
+ -2.649...
"""
- # a placeholder to make it appear in the generated doc
+ # Because we will maximize evaluation value (ref: `CrossValidator`),
+ # when we evaluate a metric that is needed to minimize (e.g., `"rmse"`, `"mse"`, `"mae"`),
+ # we take and output the negative of this metric.
metricName = Param(Params._dummy(), "metricName",
"metric name in evaluation (mse|rmse|r2|mae)")