aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpetz2000 <petz2000@gmail.com>2015-07-21 08:50:43 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-21 08:50:43 -0700
commitdf4ddb3120be28df381c11a36312620e58034b93 (patch)
treebe6b07fdf377f3e4a911a6dff9c8e5231856ad2e
parentbe5c5d3741256697cc76938a8ed6f609eb2d4b11 (diff)
downloadspark-df4ddb3120be28df381c11a36312620e58034b93.tar.gz
spark-df4ddb3120be28df381c11a36312620e58034b93.tar.bz2
spark-df4ddb3120be28df381c11a36312620e58034b93.zip
[SPARK-8915] [DOCUMENTATION, MLLIB] Added @since tags to mllib.classification
Created since tags for methods in mllib.classification Author: petz2000 <petz2000@gmail.com> Closes #7371 from petz2000/add_since_mllib.classification and squashes the following commits: 39fe291 [petz2000] Removed whitespace in block comment c9b1e03 [petz2000] Removed @since tags again from protected and private methods cd759b6 [petz2000] Added @since tags to methods
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala17
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala16
4 files changed, 39 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
index 35a0db76f3..ba73024e3c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -36,6 +36,7 @@ trait ClassificationModel extends Serializable {
*
* @param testData RDD representing data points to be predicted
* @return an RDD[Double] where each entry contains the corresponding prediction
+ * @since 0.8.0
*/
def predict(testData: RDD[Vector]): RDD[Double]
@@ -44,6 +45,7 @@ trait ClassificationModel extends Serializable {
*
* @param testData array representing a single data point
* @return predicted category from the trained model
+ * @since 0.8.0
*/
def predict(testData: Vector): Double
@@ -51,6 +53,7 @@ trait ClassificationModel extends Serializable {
* Predict values for examples stored in a JavaRDD.
* @param testData JavaRDD representing data points to be predicted
* @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
+ * @since 0.8.0
*/
def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 2df4d21e8c..268642ac6a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -85,6 +85,7 @@ class LogisticRegressionModel (
* in Binary Logistic Regression. An example with prediction score greater than or equal to
* this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
* It is only used for binary classification.
+ * @since 1.0.0
*/
@Experimental
def setThreshold(threshold: Double): this.type = {
@@ -96,6 +97,7 @@ class LogisticRegressionModel (
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
* It is only used for binary classification.
+ * @since 1.3.0
*/
@Experimental
def getThreshold: Option[Double] = threshold
@@ -104,6 +106,7 @@ class LogisticRegressionModel (
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
* It is only used for binary classification.
+ * @since 1.0.0
*/
@Experimental
def clearThreshold(): this.type = {
@@ -155,6 +158,9 @@ class LogisticRegressionModel (
}
}
+ /**
+ * @since 1.3.0
+ */
override def save(sc: SparkContext, path: String): Unit = {
GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
numFeatures, numClasses, weights, intercept, threshold)
@@ -162,6 +168,9 @@ class LogisticRegressionModel (
override protected def formatVersion: String = "1.0"
+ /**
+ * @since 1.4.0
+ */
override def toString: String = {
s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
}
@@ -169,6 +178,9 @@ class LogisticRegressionModel (
object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
+ /**
+ * @since 1.3.0
+ */
override def load(sc: SparkContext, path: String): LogisticRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -249,6 +261,7 @@ object LogisticRegressionWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
+ * @since 1.0.0
*/
def train(
input: RDD[LabeledPoint],
@@ -271,6 +284,7 @@ object LogisticRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
+ * @since 1.0.0
*/
def train(
input: RDD[LabeledPoint],
@@ -292,6 +306,7 @@ object LogisticRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @return a LogisticRegressionModel which has the weights and offset from training.
+ * @since 1.0.0
*/
def train(
input: RDD[LabeledPoint],
@@ -309,6 +324,7 @@ object LogisticRegressionWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LogisticRegressionModel which has the weights and offset from training.
+ * @since 1.0.0
*/
def train(
input: RDD[LabeledPoint],
@@ -345,6 +361,7 @@ class LogisticRegressionWithLBFGS
* Set the number of possible outcomes for k classes classification problem in
* Multinomial Logistic Regression.
* By default, it is binary logistic regression so k will be set to 2.
+ * @since 1.3.0
*/
@Experimental
def setNumClasses(numClasses: Int): this.type = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 8cf4e15efe..2df91c0942 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -444,6 +444,7 @@ object NaiveBayes {
*
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
+ * @since 0.9.0
*/
def train(input: RDD[LabeledPoint]): NaiveBayesModel = {
new NaiveBayes().run(input)
@@ -459,6 +460,7 @@ object NaiveBayes {
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
* @param lambda The smoothing parameter
+ * @since 0.9.0
*/
def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = {
new NaiveBayes(lambda, Multinomial).run(input)
@@ -481,6 +483,7 @@ object NaiveBayes {
*
* @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
* multinomial or bernoulli
+ * @since 0.9.0
*/
def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
require(supportedModelTypes.contains(modelType),
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 3484855607..5b54feeb10 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -46,6 +46,7 @@ class SVMModel (
* Sets the threshold that separates positive predictions from negative predictions. An example
* with prediction score greater than or equal to this threshold is identified as an positive,
* and negative otherwise. The default value is 0.0.
+ * @since 1.3.0
*/
@Experimental
def setThreshold(threshold: Double): this.type = {
@@ -56,6 +57,7 @@ class SVMModel (
/**
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
+ * @since 1.3.0
*/
@Experimental
def getThreshold: Option[Double] = threshold
@@ -63,6 +65,7 @@ class SVMModel (
/**
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
+ * @since 1.0.0
*/
@Experimental
def clearThreshold(): this.type = {
@@ -81,6 +84,9 @@ class SVMModel (
}
}
+ /**
+ * @since 1.3.0
+ */
override def save(sc: SparkContext, path: String): Unit = {
GLMClassificationModel.SaveLoadV1_0.save(sc, path, this.getClass.getName,
numFeatures = weights.size, numClasses = 2, weights, intercept, threshold)
@@ -88,6 +94,9 @@ class SVMModel (
override protected def formatVersion: String = "1.0"
+ /**
+ * @since 1.4.0
+ */
override def toString: String = {
s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
}
@@ -95,6 +104,9 @@ class SVMModel (
object SVMModel extends Loader[SVMModel] {
+ /**
+ * @since 1.3.0
+ */
override def load(sc: SparkContext, path: String): SVMModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -173,6 +185,7 @@ object SVMWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -196,6 +209,7 @@ object SVMWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -217,6 +231,7 @@ object SVMWithSGD {
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
* @return a SVMModel which has the weights and offset from training.
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -235,6 +250,7 @@ object SVMWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a SVMModel which has the weights and offset from training.
+ * @since 0.8.0
*/
def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
train(input, numIterations, 1.0, 0.01, 1.0)