diff options
author | Sean Owen <sowen@cloudera.com> | 2016-05-26 14:25:28 -0700 |
---|---|---|
committer | Joseph K. Bradley <joseph@databricks.com> | 2016-05-26 14:25:28 -0700 |
commit | b0a03feef2cf4daa7642ec7f4dc479dbd473b581 (patch) | |
tree | 0d974e9a1badbe412a02c7bae91da2f3e98c2666 /examples/src/main | |
parent | 0f61d6efb45b9ee94fa663f67c4489fbdae2eded (diff) | |
download | spark-b0a03feef2cf4daa7642ec7f4dc479dbd473b581.tar.gz spark-b0a03feef2cf4daa7642ec7f4dc479dbd473b581.tar.bz2 spark-b0a03feef2cf4daa7642ec7f4dc479dbd473b581.zip |
[SPARK-15457][MLLIB][ML] Eliminate some warnings from MLlib about deprecations
## What changes were proposed in this pull request?
Several classes and methods have been deprecated and are creating lots of build warnings in branch-2.0. This issue is to identify and fix those items:
* WithSGD classes: Change to make class not deprecated, object deprecated, and public class constructor deprecated. Any public use will require a deprecated API. We need to keep a non-deprecated private API since we cannot eliminate certain uses: Python API, streaming algs, and examples.
* Use in PythonMLlibAPI: Change to using private constructors
* Streaming algs: No warnings after we un-deprecate the classes
* Examples: Deprecate or change ones which use deprecated APIs
* MulticlassMetrics fields (precision, etc.)
* LinearRegressionSummary.model field
## How was this patch tested?
Existing tests. Checked for warnings manually.
Author: Sean Owen <sowen@cloudera.com>
Author: Joseph K. Bradley <joseph@databricks.com>
Closes #13314 from jkbradley/warning-cleanups.
Diffstat (limited to 'examples/src/main')
11 files changed, 19 insertions, 23 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java index 9d8e4a90db..7fc371ec0f 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java @@ -65,8 +65,8 @@ public class JavaLogisticRegressionWithLBFGSExample { // Get evaluation metrics. MulticlassMetrics metrics = new MulticlassMetrics(predictionAndLabels.rdd()); - double precision = metrics.precision(); - System.out.println("Precision = " + precision); + double accuracy = metrics.accuracy(); + System.out.println("Accuracy = " + accuracy); // Save and load model model.save(sc, "target/tmp/javaLogisticRegressionWithLBFGSModel"); diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java index 5247c9c748..e84a3a712d 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java @@ -68,9 +68,7 @@ public class JavaMulticlassClassificationMetricsExample { System.out.println("Confusion matrix: \n" + confusion); // Overall statistics - System.out.println("Precision = " + metrics.precision()); - System.out.println("Recall = " + metrics.recall()); - System.out.println("F1 Score = " + metrics.fMeasure()); + System.out.println("Accuracy = " + metrics.accuracy()); // Stats by labels for (int i = 0; i < metrics.labels().length; i++) { diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index 5e51dbad76..de4474555d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala @@ -321,7 +321,7 @@ object DecisionTreeExample { case None => throw new RuntimeException( "Unknown failure when indexing labels for classification.") } - val accuracy = new MulticlassMetrics(predictions.zip(labels)).precision + val accuracy = new MulticlassMetrics(predictions.zip(labels)).accuracy println(s" Accuracy ($numClasses classes): $accuracy") } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index ee811d3aa1..a85aa2cac9 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -295,11 +295,10 @@ object DecisionTreeRunner { } if (params.algo == Classification) { val trainAccuracy = - new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))) - .precision + new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Train accuracy = $trainAccuracy") val testAccuracy = - new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision + new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Test accuracy = $testAccuracy") } if (params.algo == Regression) { @@ -322,11 +321,10 @@ object DecisionTreeRunner { println(model) // Print model summary. } val trainAccuracy = - new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))) - .precision + new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Train accuracy = $trainAccuracy") val testAccuracy = - new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision + new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Test accuracy = $testAccuracy") } if (params.algo == Regression) { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala index b0144ef533..90e4687c1f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala @@ -120,11 +120,10 @@ object GradientBoostedTreesRunner { println(model) // Print model summary. } val trainAccuracy = - new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))) - .precision + new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Train accuracy = $trainAccuracy") val testAccuracy = - new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision + new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Test accuracy = $testAccuracy") } else if (params.algo == "Regression") { val startTime = System.nanoTime() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala index f87611f5d4..a70203028c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala @@ -34,6 +34,7 @@ import org.apache.spark.mllib.util.MLUtils * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`. * If you use it as a template to create your own app, please use `spark-submit` to submit your app. */ +@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0") object LinearRegression { object RegType extends Enumeration { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala index 669868787e..d399618094 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala @@ -26,6 +26,7 @@ import org.apache.spark.mllib.regression.LinearRegressionModel import org.apache.spark.mllib.regression.LinearRegressionWithSGD // $example off$ +@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0") object LinearRegressionWithSGDExample { def main(args: Array[String]): Unit = { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala index 632a2d537e..31ba740ad4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala @@ -54,8 +54,8 @@ object LogisticRegressionWithLBFGSExample { // Get evaluation metrics. val metrics = new MulticlassMetrics(predictionAndLabels) - val precision = metrics.precision - println("Precision = " + precision) + val accuracy = metrics.accuracy + println(s"Accuracy = $accuracy") // Save and load model model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel") diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala index 4f925ede24..12394c867e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala @@ -59,13 +59,9 @@ object MulticlassMetricsExample { println(metrics.confusionMatrix) // Overall Statistics - val precision = metrics.precision - val recall = metrics.recall // same as true positive rate - val f1Score = metrics.fMeasure + val accuracy = metrics.accuracy println("Summary Statistics") - println(s"Precision = $precision") - println(s"Recall = $recall") - println(s"F1 Score = $f1Score") + println(s"Accuracy = $accuracy") // Precision by label val labels = metrics.labels diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala index f7a8136953..eb36697d94 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala @@ -26,6 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD} // $example off$ +@deprecated("Deprecated since LinearRegressionWithSGD is deprecated. Use ml.feature.PCA", "2.0.0") object PCAExample { def main(args: Array[String]): Unit = { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala index abeaaa00b5..76cfb804e1 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala @@ -25,6 +25,8 @@ import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD} // $example off$ import org.apache.spark.sql.SparkSession +@deprecated("Use ml.regression.LinearRegression and the resulting model summary for metrics", + "2.0.0") object RegressionMetricsExample { def main(args: Array[String]): Unit = { val spark = SparkSession |