aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2015-11-24 09:54:55 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-24 09:54:55 -0800
commit9e24ba667e43290fbaa3cacb93cf5d9be790f1fd (patch)
tree784c08b162086a0c561b544d04abac8b2df0d656 /mllib
parent56a0aba0a60326ba026056c9a23f3f6ec7258c19 (diff)
downloadspark-9e24ba667e43290fbaa3cacb93cf5d9be790f1fd.tar.gz
spark-9e24ba667e43290fbaa3cacb93cf5d9be790f1fd.tar.bz2
spark-9e24ba667e43290fbaa3cacb93cf5d9be790f1fd.zip
[SPARK-11521][ML][DOC] Document that Logistic, Linear Regression summaries ignore weight col
Doc for 1.6 that the summaries mostly ignore the weight column. To be corrected for 1.7 CC: mengxr thunterdb Author: Joseph K. Bradley <joseph@databricks.com> Closes #9927 from jkbradley/linregsummary-doc.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala18
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala15
2 files changed, 33 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 418bbdc9a0..d320d64dd9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -755,23 +755,35 @@ class BinaryLogisticRegressionSummary private[classification] (
* Returns the receiver operating characteristic (ROC) curve,
* which is an Dataframe having two fields (FPR, TPR)
* with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
+ * This will change in later Spark versions.
* @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
*/
@transient lazy val roc: DataFrame = binaryMetrics.roc().toDF("FPR", "TPR")
/**
* Computes the area under the receiver operating characteristic (ROC) curve.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
+ * This will change in later Spark versions.
*/
lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC()
/**
* Returns the precision-recall curve, which is an Dataframe containing
* two fields recall, precision with (0.0, 1.0) prepended to it.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@transient lazy val pr: DataFrame = binaryMetrics.pr().toDF("recall", "precision")
/**
* Returns a dataframe with two fields (threshold, F-Measure) curve with beta = 1.0.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@transient lazy val fMeasureByThreshold: DataFrame = {
binaryMetrics.fMeasureByThreshold().toDF("threshold", "F-Measure")
@@ -781,6 +793,9 @@ class BinaryLogisticRegressionSummary private[classification] (
* Returns a dataframe with two fields (threshold, precision) curve.
* Every possible probability obtained in transforming the dataset are used
* as thresholds used in calculating the precision.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@transient lazy val precisionByThreshold: DataFrame = {
binaryMetrics.precisionByThreshold().toDF("threshold", "precision")
@@ -790,6 +805,9 @@ class BinaryLogisticRegressionSummary private[classification] (
* Returns a dataframe with two fields (threshold, recall) curve.
* Every possible probability obtained in transforming the dataset are used
* as thresholds used in calculating the recall.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@transient lazy val recallByThreshold: DataFrame = {
binaryMetrics.recallByThreshold().toDF("threshold", "recall")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 70ccec766c..1db91666f2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -540,6 +540,9 @@ class LinearRegressionSummary private[regression] (
* Returns the explained variance regression score.
* explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
* Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@Since("1.5.0")
val explainedVariance: Double = metrics.explainedVariance
@@ -547,6 +550,9 @@ class LinearRegressionSummary private[regression] (
/**
* Returns the mean absolute error, which is a risk function corresponding to the
* expected value of the absolute error loss or l1-norm loss.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@Since("1.5.0")
val meanAbsoluteError: Double = metrics.meanAbsoluteError
@@ -554,6 +560,9 @@ class LinearRegressionSummary private[regression] (
/**
* Returns the mean squared error, which is a risk function corresponding to the
* expected value of the squared error loss or quadratic loss.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@Since("1.5.0")
val meanSquaredError: Double = metrics.meanSquaredError
@@ -561,6 +570,9 @@ class LinearRegressionSummary private[regression] (
/**
* Returns the root mean squared error, which is defined as the square root of
* the mean squared error.
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@Since("1.5.0")
val rootMeanSquaredError: Double = metrics.rootMeanSquaredError
@@ -568,6 +580,9 @@ class LinearRegressionSummary private[regression] (
/**
* Returns R^2^, the coefficient of determination.
* Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+ *
+ * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+ * This will change in later Spark versions.
*/
@Since("1.5.0")
val r2: Double = metrics.r2