From b0a03feef2cf4daa7642ec7f4dc479dbd473b581 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Thu, 26 May 2016 14:25:28 -0700 Subject: [SPARK-15457][MLLIB][ML] Eliminate some warnings from MLlib about deprecations ## What changes were proposed in this pull request? Several classes and methods have been deprecated and are creating lots of build warnings in branch-2.0. This issue is to identify and fix those items: * WithSGD classes: Change to make class not deprecated, object deprecated, and public class constructor deprecated. Any public use will require a deprecated API. We need to keep a non-deprecated private API since we cannot eliminate certain uses: Python API, streaming algs, and examples. * Use in PythonMLlibAPI: Change to using private constructors * Streaming algs: No warnings after we un-deprecate the classes * Examples: Deprecate or change ones which use deprecated APIs * MulticlassMetrics fields (precision, etc.) * LinearRegressionSummary.model field ## How was this patch tested? Existing tests. Checked for warnings manually. Author: Sean Owen Author: Joseph K. Bradley Closes #13314 from jkbradley/warning-cleanups. --- .../examples/mllib/JavaLogisticRegressionWithLBFGSExample.java | 4 ++-- .../mllib/JavaMulticlassClassificationMetricsExample.java | 4 +--- .../org/apache/spark/examples/ml/DecisionTreeExample.scala | 2 +- .../org/apache/spark/examples/mllib/DecisionTreeRunner.scala | 10 ++++------ .../spark/examples/mllib/GradientBoostedTreesRunner.scala | 5 ++--- .../org/apache/spark/examples/mllib/LinearRegression.scala | 1 + .../spark/examples/mllib/LinearRegressionWithSGDExample.scala | 1 + .../examples/mllib/LogisticRegressionWithLBFGSExample.scala | 4 ++-- .../apache/spark/examples/mllib/MulticlassMetricsExample.scala | 8 ++------ .../scala/org/apache/spark/examples/mllib/PCAExample.scala | 1 + .../apache/spark/examples/mllib/RegressionMetricsExample.scala | 2 ++ 11 files changed, 19 insertions(+), 23 deletions(-) (limited to 'examples') diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java index 9d8e4a90db..7fc371ec0f 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java @@ -65,8 +65,8 @@ public class JavaLogisticRegressionWithLBFGSExample { // Get evaluation metrics. MulticlassMetrics metrics = new MulticlassMetrics(predictionAndLabels.rdd()); - double precision = metrics.precision(); - System.out.println("Precision = " + precision); + double accuracy = metrics.accuracy(); + System.out.println("Accuracy = " + accuracy); // Save and load model model.save(sc, "target/tmp/javaLogisticRegressionWithLBFGSModel"); diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java index 5247c9c748..e84a3a712d 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java @@ -68,9 +68,7 @@ public class JavaMulticlassClassificationMetricsExample { System.out.println("Confusion matrix: \n" + confusion); // Overall statistics - System.out.println("Precision = " + metrics.precision()); - System.out.println("Recall = " + metrics.recall()); - System.out.println("F1 Score = " + metrics.fMeasure()); + System.out.println("Accuracy = " + metrics.accuracy()); // Stats by labels for (int i = 0; i < metrics.labels().length; i++) { diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index 5e51dbad76..de4474555d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala @@ -321,7 +321,7 @@ object DecisionTreeExample { case None => throw new RuntimeException( "Unknown failure when indexing labels for classification.") } - val accuracy = new MulticlassMetrics(predictions.zip(labels)).precision + val accuracy = new MulticlassMetrics(predictions.zip(labels)).accuracy println(s" Accuracy ($numClasses classes): $accuracy") } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index ee811d3aa1..a85aa2cac9 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -295,11 +295,10 @@ object DecisionTreeRunner { } if (params.algo == Classification) { val trainAccuracy = - new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))) - .precision + new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Train accuracy = $trainAccuracy") val testAccuracy = - new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision + new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Test accuracy = $testAccuracy") } if (params.algo == Regression) { @@ -322,11 +321,10 @@ object DecisionTreeRunner { println(model) // Print model summary. } val trainAccuracy = - new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))) - .precision + new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Train accuracy = $trainAccuracy") val testAccuracy = - new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision + new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Test accuracy = $testAccuracy") } if (params.algo == Regression) { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala index b0144ef533..90e4687c1f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala @@ -120,11 +120,10 @@ object GradientBoostedTreesRunner { println(model) // Print model summary. } val trainAccuracy = - new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))) - .precision + new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Train accuracy = $trainAccuracy") val testAccuracy = - new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).precision + new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy println(s"Test accuracy = $testAccuracy") } else if (params.algo == "Regression") { val startTime = System.nanoTime() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala index f87611f5d4..a70203028c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala @@ -34,6 +34,7 @@ import org.apache.spark.mllib.util.MLUtils * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`. * If you use it as a template to create your own app, please use `spark-submit` to submit your app. */ +@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0") object LinearRegression { object RegType extends Enumeration { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala index 669868787e..d399618094 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala @@ -26,6 +26,7 @@ import org.apache.spark.mllib.regression.LinearRegressionModel import org.apache.spark.mllib.regression.LinearRegressionWithSGD // $example off$ +@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0") object LinearRegressionWithSGDExample { def main(args: Array[String]): Unit = { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala index 632a2d537e..31ba740ad4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala @@ -54,8 +54,8 @@ object LogisticRegressionWithLBFGSExample { // Get evaluation metrics. val metrics = new MulticlassMetrics(predictionAndLabels) - val precision = metrics.precision - println("Precision = " + precision) + val accuracy = metrics.accuracy + println(s"Accuracy = $accuracy") // Save and load model model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel") diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala index 4f925ede24..12394c867e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala @@ -59,13 +59,9 @@ object MulticlassMetricsExample { println(metrics.confusionMatrix) // Overall Statistics - val precision = metrics.precision - val recall = metrics.recall // same as true positive rate - val f1Score = metrics.fMeasure + val accuracy = metrics.accuracy println("Summary Statistics") - println(s"Precision = $precision") - println(s"Recall = $recall") - println(s"F1 Score = $f1Score") + println(s"Accuracy = $accuracy") // Precision by label val labels = metrics.labels diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala index f7a8136953..eb36697d94 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala @@ -26,6 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD} // $example off$ +@deprecated("Deprecated since LinearRegressionWithSGD is deprecated. Use ml.feature.PCA", "2.0.0") object PCAExample { def main(args: Array[String]): Unit = { diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala index abeaaa00b5..76cfb804e1 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala @@ -25,6 +25,8 @@ import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD} // $example off$ import org.apache.spark.sql.SparkSession +@deprecated("Use ml.regression.LinearRegression and the resulting model summary for metrics", + "2.0.0") object RegressionMetricsExample { def main(args: Array[String]): Unit = { val spark = SparkSession -- cgit v1.2.3