diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2015-05-07 11:18:32 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-05-07 11:18:38 -0700 |
commit | ef835dc526b685886781d454c46e837644d8f446 (patch) | |
tree | 2ee3cdc3d8dd08f870f14a12b1a78418037fa0d9 /mllib | |
parent | 3038b26f1e8ad2c4bd90b630005c75e3cd862e1d (diff) | |
download | spark-ef835dc526b685886781d454c46e837644d8f446.tar.gz spark-ef835dc526b685886781d454c46e837644d8f446.tar.bz2 spark-ef835dc526b685886781d454c46e837644d8f446.zip |
[SPARK-6093] [MLLIB] Add RegressionMetrics in PySpark/MLlib
https://issues.apache.org/jira/browse/SPARK-6093
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #5941 from yanboliang/spark-6093 and squashes the following commits:
6934af3 [Yanbo Liang] change to @property
aac3bc5 [Yanbo Liang] Add RegressionMetrics in PySpark/MLlib
(cherry picked from commit 1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala index 693117d820..e577bf87f8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala @@ -22,6 +22,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer} +import org.apache.spark.sql.DataFrame /** * :: Experimental :: @@ -33,6 +34,14 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Multivariate class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging { /** + * An auxiliary constructor taking a DataFrame. + * @param predictionAndObservations a DataFrame with two double columns: + * prediction and observation + */ + private[mllib] def this(predictionAndObservations: DataFrame) = + this(predictionAndObservations.map(r => (r.getDouble(0), r.getDouble(1)))) + + /** * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors. */ private lazy val summary: MultivariateStatisticalSummary = { |