diff options
author | Yanbo Liang <ybliang8@gmail.com> | 2015-05-07 11:18:32 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-05-07 11:18:32 -0700 |
commit | 1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32 (patch) | |
tree | a1130a8afd9ca2c9683434791a68a9e2a3379a09 /mllib | |
parent | 068c3158ac0c66e20d90a45e6a2a0b93108e08d5 (diff) | |
download | spark-1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32.tar.gz spark-1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32.tar.bz2 spark-1712a7c7057bf6dd5da8aea1d7fbecdf96ea4b32.zip |
[SPARK-6093] [MLLIB] Add RegressionMetrics in PySpark/MLlib
https://issues.apache.org/jira/browse/SPARK-6093
Author: Yanbo Liang <ybliang8@gmail.com>
Closes #5941 from yanboliang/spark-6093 and squashes the following commits:
6934af3 [Yanbo Liang] change to @property
aac3bc5 [Yanbo Liang] Add RegressionMetrics in PySpark/MLlib
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala index 693117d820..e577bf87f8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala @@ -22,6 +22,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer} +import org.apache.spark.sql.DataFrame /** * :: Experimental :: @@ -33,6 +34,14 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Multivariate class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging { /** + * An auxiliary constructor taking a DataFrame. + * @param predictionAndObservations a DataFrame with two double columns: + * prediction and observation + */ + private[mllib] def this(predictionAndObservations: DataFrame) = + this(predictionAndObservations.map(r => (r.getDouble(0), r.getDouble(1)))) + + /** * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors. */ private lazy val summary: MultivariateStatisticalSummary = { |