aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authornoelsmith <mail@noelsmith.com>2015-10-20 15:05:02 -0700
committerXiangrui Meng <meng@databricks.com>2015-10-20 15:05:02 -0700
commit82e9d9c81b7a8af94e312035328da872684f6e67 (patch)
tree65a0d99d935b9959950b641452b8d5cf2739d0fe /python
parent06e6b765d0c747b773d7f3be28ddb0543c955a1f (diff)
downloadspark-82e9d9c81b7a8af94e312035328da872684f6e67.tar.gz
spark-82e9d9c81b7a8af94e312035328da872684f6e67.tar.bz2
spark-82e9d9c81b7a8af94e312035328da872684f6e67.zip
[SPARK-10272][PYSPARK][MLLIB] Added @since tags to pyspark.mllib.evaluation
Duplicated the since decorator from pyspark.sql into pyspark (also tweaked to handle functions without docstrings). Added since to public methods + "versionadded::" to classes (derived from the git file history in pyspark). Note - I added also the tags to MultilabelMetrics even though it isn't declared as public in the __all__ statement... if that's incorrect - I'll remove. Author: noelsmith <mail@noelsmith.com> Closes #8628 from noel-smith/SPARK-10272-since-mllib-evalutation.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/evaluation.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index a90e5c50e5..8c87ee9df2 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -15,6 +15,7 @@
# limitations under the License.
#
+from pyspark import since
from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
from pyspark.sql import SQLContext
from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType
@@ -37,6 +38,8 @@ class BinaryClassificationMetrics(JavaModelWrapper):
>>> metrics.areaUnderPR
0.83...
>>> metrics.unpersist()
+
+ .. versionadded:: 1.4.0
"""
def __init__(self, scoreAndLabels):
@@ -50,6 +53,7 @@ class BinaryClassificationMetrics(JavaModelWrapper):
super(BinaryClassificationMetrics, self).__init__(java_model)
@property
+ @since('1.4.0')
def areaUnderROC(self):
"""
Computes the area under the receiver operating characteristic
@@ -58,12 +62,14 @@ class BinaryClassificationMetrics(JavaModelWrapper):
return self.call("areaUnderROC")
@property
+ @since('1.4.0')
def areaUnderPR(self):
"""
Computes the area under the precision-recall curve.
"""
return self.call("areaUnderPR")
+ @since('1.4.0')
def unpersist(self):
"""
Unpersists intermediate RDDs used in the computation.
@@ -91,6 +97,8 @@ class RegressionMetrics(JavaModelWrapper):
0.61...
>>> metrics.r2
0.94...
+
+ .. versionadded:: 1.4.0
"""
def __init__(self, predictionAndObservations):
@@ -104,6 +112,7 @@ class RegressionMetrics(JavaModelWrapper):
super(RegressionMetrics, self).__init__(java_model)
@property
+ @since('1.4.0')
def explainedVariance(self):
"""
Returns the explained variance regression score.
@@ -112,6 +121,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("explainedVariance")
@property
+ @since('1.4.0')
def meanAbsoluteError(self):
"""
Returns the mean absolute error, which is a risk function corresponding to the
@@ -120,6 +130,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("meanAbsoluteError")
@property
+ @since('1.4.0')
def meanSquaredError(self):
"""
Returns the mean squared error, which is a risk function corresponding to the
@@ -128,6 +139,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("meanSquaredError")
@property
+ @since('1.4.0')
def rootMeanSquaredError(self):
"""
Returns the root mean squared error, which is defined as the square root of
@@ -136,6 +148,7 @@ class RegressionMetrics(JavaModelWrapper):
return self.call("rootMeanSquaredError")
@property
+ @since('1.4.0')
def r2(self):
"""
Returns R^2^, the coefficient of determination.
@@ -178,6 +191,8 @@ class MulticlassMetrics(JavaModelWrapper):
0.66...
>>> metrics.weightedFMeasure(2.0)
0.65...
+
+ .. versionadded:: 1.4.0
"""
def __init__(self, predictionAndLabels):
@@ -190,6 +205,7 @@ class MulticlassMetrics(JavaModelWrapper):
java_model = java_class(df._jdf)
super(MulticlassMetrics, self).__init__(java_model)
+ @since('1.4.0')
def confusionMatrix(self):
"""
Returns confusion matrix: predicted classes are in columns,
@@ -197,18 +213,21 @@ class MulticlassMetrics(JavaModelWrapper):
"""
return self.call("confusionMatrix")
+ @since('1.4.0')
def truePositiveRate(self, label):
"""
Returns true positive rate for a given label (category).
"""
return self.call("truePositiveRate", label)
+ @since('1.4.0')
def falsePositiveRate(self, label):
"""
Returns false positive rate for a given label (category).
"""
return self.call("falsePositiveRate", label)
+ @since('1.4.0')
def precision(self, label=None):
"""
Returns precision or precision for a given label (category) if specified.
@@ -218,6 +237,7 @@ class MulticlassMetrics(JavaModelWrapper):
else:
return self.call("precision", float(label))
+ @since('1.4.0')
def recall(self, label=None):
"""
Returns recall or recall for a given label (category) if specified.
@@ -227,6 +247,7 @@ class MulticlassMetrics(JavaModelWrapper):
else:
return self.call("recall", float(label))
+ @since('1.4.0')
def fMeasure(self, label=None, beta=None):
"""
Returns f-measure or f-measure for a given label (category) if specified.
@@ -243,6 +264,7 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("fMeasure", label, beta)
@property
+ @since('1.4.0')
def weightedTruePositiveRate(self):
"""
Returns weighted true positive rate.
@@ -251,6 +273,7 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("weightedTruePositiveRate")
@property
+ @since('1.4.0')
def weightedFalsePositiveRate(self):
"""
Returns weighted false positive rate.
@@ -258,6 +281,7 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("weightedFalsePositiveRate")
@property
+ @since('1.4.0')
def weightedRecall(self):
"""
Returns weighted averaged recall.
@@ -266,12 +290,14 @@ class MulticlassMetrics(JavaModelWrapper):
return self.call("weightedRecall")
@property
+ @since('1.4.0')
def weightedPrecision(self):
"""
Returns weighted averaged precision.
"""
return self.call("weightedPrecision")
+ @since('1.4.0')
def weightedFMeasure(self, beta=None):
"""
Returns weighted averaged f-measure.
@@ -307,6 +333,7 @@ class RankingMetrics(JavaModelWrapper):
>>> metrics.ndcgAt(10)
0.48...
+ .. versionadded:: 1.4.0
"""
def __init__(self, predictionAndLabels):
@@ -317,6 +344,7 @@ class RankingMetrics(JavaModelWrapper):
java_model = callMLlibFunc("newRankingMetrics", df._jdf)
super(RankingMetrics, self).__init__(java_model)
+ @since('1.4.0')
def precisionAt(self, k):
"""
Compute the average precision of all the queries, truncated at ranking position k.
@@ -331,6 +359,7 @@ class RankingMetrics(JavaModelWrapper):
return self.call("precisionAt", int(k))
@property
+ @since('1.4.0')
def meanAveragePrecision(self):
"""
Returns the mean average precision (MAP) of all the queries.
@@ -339,6 +368,7 @@ class RankingMetrics(JavaModelWrapper):
"""
return self.call("meanAveragePrecision")
+ @since('1.4.0')
def ndcgAt(self, k):
"""
Compute the average NDCG value of all the queries, truncated at ranking position k.
@@ -388,6 +418,8 @@ class MultilabelMetrics(JavaModelWrapper):
0.28...
>>> metrics.accuracy
0.54...
+
+ .. versionadded:: 1.4.0
"""
def __init__(self, predictionAndLabels):
@@ -399,6 +431,7 @@ class MultilabelMetrics(JavaModelWrapper):
java_model = java_class(df._jdf)
super(MultilabelMetrics, self).__init__(java_model)
+ @since('1.4.0')
def precision(self, label=None):
"""
Returns precision or precision for a given label (category) if specified.
@@ -408,6 +441,7 @@ class MultilabelMetrics(JavaModelWrapper):
else:
return self.call("precision", float(label))
+ @since('1.4.0')
def recall(self, label=None):
"""
Returns recall or recall for a given label (category) if specified.
@@ -417,6 +451,7 @@ class MultilabelMetrics(JavaModelWrapper):
else:
return self.call("recall", float(label))
+ @since('1.4.0')
def f1Measure(self, label=None):
"""
Returns f1Measure or f1Measure for a given label (category) if specified.
@@ -427,6 +462,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("f1Measure", float(label))
@property
+ @since('1.4.0')
def microPrecision(self):
"""
Returns micro-averaged label-based precision.
@@ -435,6 +471,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("microPrecision")
@property
+ @since('1.4.0')
def microRecall(self):
"""
Returns micro-averaged label-based recall.
@@ -443,6 +480,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("microRecall")
@property
+ @since('1.4.0')
def microF1Measure(self):
"""
Returns micro-averaged label-based f1-measure.
@@ -451,6 +489,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("microF1Measure")
@property
+ @since('1.4.0')
def hammingLoss(self):
"""
Returns Hamming-loss.
@@ -458,6 +497,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("hammingLoss")
@property
+ @since('1.4.0')
def subsetAccuracy(self):
"""
Returns subset accuracy.
@@ -466,6 +506,7 @@ class MultilabelMetrics(JavaModelWrapper):
return self.call("subsetAccuracy")
@property
+ @since('1.4.0')
def accuracy(self):
"""
Returns accuracy.