diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2016-06-06 09:36:34 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-06-06 09:36:34 +0100 |
commit | a95252823e09939b654dd425db38dadc4100bc87 (patch) | |
tree | 3d563d80f9d6c946b882a31145a12383e0b649bf /examples/src/main/python | |
parent | fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27 (diff) | |
download | spark-a95252823e09939b654dd425db38dadc4100bc87.tar.gz spark-a95252823e09939b654dd425db38dadc4100bc87.tar.bz2 spark-a95252823e09939b654dd425db38dadc4100bc87.zip |
[SPARK-15771][ML][EXAMPLES] Use 'accuracy' rather than 'precision' in many ML examples
## What changes were proposed in this pull request?
Since [SPARK-15617](https://issues.apache.org/jira/browse/SPARK-15617) deprecated ```precision``` in ```MulticlassClassificationEvaluator```, many ML examples broken.
```python
pyspark.sql.utils.IllegalArgumentException: u'MulticlassClassificationEvaluator_4c3bb1d73d8cc0cedae6 parameter metricName given invalid value precision.'
```
We should use ```accuracy``` to replace ```precision``` in these examples.
## How was this patch tested?
Offline tests.
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #13519 from yanboliang/spark-15771.
Diffstat (limited to 'examples/src/main/python')
6 files changed, 12 insertions, 12 deletions
diff --git a/examples/src/main/python/ml/decision_tree_classification_example.py b/examples/src/main/python/ml/decision_tree_classification_example.py index 9b40b701ec..708f1af6cc 100644 --- a/examples/src/main/python/ml/decision_tree_classification_example.py +++ b/examples/src/main/python/ml/decision_tree_classification_example.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( - labelCol="indexedLabel", predictionCol="prediction", metricName="precision") + labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Error = %g " % (1.0 - accuracy)) diff --git a/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py b/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py index 50026d7b7e..6c2d7e7b81 100644 --- a/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py +++ b/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( - labelCol="indexedLabel", predictionCol="prediction", metricName="precision") + labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Error = %g" % (1.0 - accuracy)) diff --git a/examples/src/main/python/ml/multilayer_perceptron_classification.py b/examples/src/main/python/ml/multilayer_perceptron_classification.py index 8bededc14d..aa33bef5a3 100644 --- a/examples/src/main/python/ml/multilayer_perceptron_classification.py +++ b/examples/src/main/python/ml/multilayer_perceptron_classification.py @@ -43,11 +43,11 @@ if __name__ == "__main__": trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234) # train the model model = trainer.fit(train) - # compute precision on the test set + # compute accuracy on the test set result = model.transform(test) predictionAndLabels = result.select("prediction", "label") - evaluator = MulticlassClassificationEvaluator(metricName="precision") - print("Precision:" + str(evaluator.evaluate(predictionAndLabels))) + evaluator = MulticlassClassificationEvaluator(metricName="accuracy") + print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels))) # $example off$ spark.stop() diff --git a/examples/src/main/python/ml/naive_bayes_example.py b/examples/src/main/python/ml/naive_bayes_example.py index 89255a2bae..8bc32222fe 100644 --- a/examples/src/main/python/ml/naive_bayes_example.py +++ b/examples/src/main/python/ml/naive_bayes_example.py @@ -43,11 +43,11 @@ if __name__ == "__main__": # train the model model = nb.fit(train) - # compute precision on the test set + # compute accuracy on the test set result = model.transform(test) predictionAndLabels = result.select("prediction", "label") - evaluator = MulticlassClassificationEvaluator(metricName="precision") - print("Precision:" + str(evaluator.evaluate(predictionAndLabels))) + evaluator = MulticlassClassificationEvaluator(metricName="accuracy") + print("Accuracy: " + str(evaluator.evaluate(predictionAndLabels))) # $example off$ spark.stop() diff --git a/examples/src/main/python/ml/one_vs_rest_example.py b/examples/src/main/python/ml/one_vs_rest_example.py index 971156d0dd..b82087beba 100644 --- a/examples/src/main/python/ml/one_vs_rest_example.py +++ b/examples/src/main/python/ml/one_vs_rest_example.py @@ -58,11 +58,11 @@ if __name__ == "__main__": predictions = ovrModel.transform(test) # obtain evaluator. - evaluator = MulticlassClassificationEvaluator(metricName="precision") + evaluator = MulticlassClassificationEvaluator(metricName="accuracy") # compute the classification error on test data. - precision = evaluator.evaluate(predictions) - print("Test Error : " + str(1 - precision)) + accuracy = evaluator.evaluate(predictions) + print("Test Error : " + str(1 - accuracy)) # $example off$ spark.stop() diff --git a/examples/src/main/python/ml/random_forest_classifier_example.py b/examples/src/main/python/ml/random_forest_classifier_example.py index c618eaf60c..a7fc765318 100644 --- a/examples/src/main/python/ml/random_forest_classifier_example.py +++ b/examples/src/main/python/ml/random_forest_classifier_example.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # Select (prediction, true label) and compute test error evaluator = MulticlassClassificationEvaluator( - labelCol="indexedLabel", predictionCol="prediction", metricName="precision") + labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy") accuracy = evaluator.evaluate(predictions) print("Test Error = %g" % (1.0 - accuracy)) |