aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPrayag Chandran <prayagchandran@gmail.com>2015-08-17 17:26:08 -0700
committerDB Tsai <dbt@netflix.com>2015-08-17 17:26:08 -0700
commit18523c130548f0438dff8d1f25531fd2ed36e517 (patch)
tree4ce59a9913989eeabff818c2a59552c8f6bfc289
parent0076e8212334c613599dcbc2ac23f49e9e50cc44 (diff)
downloadspark-18523c130548f0438dff8d1f25531fd2ed36e517.tar.gz
spark-18523c130548f0438dff8d1f25531fd2ed36e517.tar.bz2
spark-18523c130548f0438dff8d1f25531fd2ed36e517.zip
SPARK-8916 [Documentation, MLlib] Add @since tags to mllib.regression
Added since tags to mllib.regression Author: Prayag Chandran <prayagchandran@gmail.com> Closes #7518 from prayagchandran/sinceTags and squashes the following commits: fa4dda2 [Prayag Chandran] Re-formatting 6c6d584 [Prayag Chandran] Corrected a few tags. Removed few unnecessary tags 1a0365f [Prayag Chandran] Reformating and adding a few more tags 89fdb66 [Prayag Chandran] SPARK-8916 [Documentation, MLlib] Add @since tags to mllib.regression
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala26
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala23
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala21
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala21
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala21
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala32
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala21
9 files changed, 168 insertions, 12 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 6709bd79bc..2980b94de3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -34,6 +34,8 @@ import org.apache.spark.storage.StorageLevel
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
*/
@DeveloperApi
abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
@@ -53,6 +55,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
*
* @param testData RDD representing data points to be predicted
* @return RDD[Double] where each entry contains the corresponding prediction
+ *
+ * @since 1.0.0
*/
def predict(testData: RDD[Vector]): RDD[Double] = {
// A small optimization to avoid serializing the entire model. Only the weightsMatrix
@@ -71,6 +75,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
*
* @param testData array representing a single data point
* @return Double prediction from the trained model
+ *
+ * @since 1.0.0
*/
def predict(testData: Vector): Double = {
predictPoint(testData, weights, intercept)
@@ -88,6 +94,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
* :: DeveloperApi ::
* GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).
* This class should be extended with an Optimizer to create a new GLM.
+ *
+ * @since 0.8.0
*/
@DeveloperApi
abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
@@ -95,7 +103,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()
- /** The optimizer to solve the problem. */
+ /**
+ * The optimizer to solve the problem.
+ *
+ * @since 1.0.0
+ */
def optimizer: Optimizer
/** Whether to add intercept (default: false). */
@@ -130,6 +142,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* The dimension of training features.
+ *
+ * @since 1.4.0
*/
def getNumFeatures: Int = this.numFeatures
@@ -153,12 +167,16 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Get if the algorithm uses addIntercept
+ *
+ * @since 1.4.0
*/
def isAddIntercept: Boolean = this.addIntercept
/**
* Set if the algorithm should add an intercept. Default false.
* We set the default to false because adding the intercept will cause memory allocation.
+ *
+ * @since 0.8.0
*/
def setIntercept(addIntercept: Boolean): this.type = {
this.addIntercept = addIntercept
@@ -167,6 +185,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Set if the algorithm should validate data before training. Default true.
+ *
+ * @since 0.8.0
*/
def setValidateData(validateData: Boolean): this.type = {
this.validateData = validateData
@@ -176,6 +196,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Run the algorithm with the configured parameters on an input
* RDD of LabeledPoint entries.
+ *
+ * @since 0.8.0
*/
def run(input: RDD[LabeledPoint]): M = {
if (numFeatures < 0) {
@@ -208,6 +230,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Run the algorithm with the configured parameters on an input RDD
* of LabeledPoint entries starting from the initial weights provided.
+ *
+ * @since 1.0.0
*/
def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index f3b46c75c0..8995591d9e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -46,6 +46,8 @@ import org.apache.spark.sql.SQLContext
* @param predictions Array of predictions associated to the boundaries at the same index.
* Results of isotonic regression and therefore monotone.
* @param isotonic indicates whether this is isotonic or antitonic.
+ *
+ * @since 1.3.0
*/
@Experimental
class IsotonicRegressionModel (
@@ -59,7 +61,11 @@ class IsotonicRegressionModel (
assertOrdered(boundaries)
assertOrdered(predictions)(predictionOrd)
- /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */
+ /**
+ * A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
+ *
+ * @since 1.4.0
+ */
def this(boundaries: java.lang.Iterable[Double],
predictions: java.lang.Iterable[Double],
isotonic: java.lang.Boolean) = {
@@ -83,6 +89,8 @@ class IsotonicRegressionModel (
*
* @param testData Features to be labeled.
* @return Predicted labels.
+ *
+ * @since 1.3.0
*/
def predict(testData: RDD[Double]): RDD[Double] = {
testData.map(predict)
@@ -94,6 +102,8 @@ class IsotonicRegressionModel (
*
* @param testData Features to be labeled.
* @return Predicted labels.
+ *
+ * @since 1.3.0
*/
def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
@@ -114,6 +124,8 @@ class IsotonicRegressionModel (
* 3) If testData falls between two values in boundary array then prediction is treated
* as piecewise linear function and interpolated value is returned. In case there are
* multiple values with the same boundary then the same rules as in 2) are used.
+ *
+ * @since 1.3.0
*/
def predict(testData: Double): Double = {
@@ -148,6 +160,9 @@ class IsotonicRegressionModel (
/** A convenient method for boundaries called by the Python API. */
private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
+ /**
+ * @since 1.4.0
+ */
override def save(sc: SparkContext, path: String): Unit = {
IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
}
@@ -155,6 +170,9 @@ class IsotonicRegressionModel (
override protected def formatVersion: String = "1.0"
}
+/**
+ * @since 1.4.0
+ */
object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
import org.apache.spark.mllib.util.Loader._
@@ -200,6 +218,9 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
}
}
+ /**
+ * @since 1.4.0
+ */
override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
implicit val formats = DefaultFormats
val (loadedClassName, version, metadata) = loadMetadata(sc, path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index d5fea822ad..8b51011eeb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -28,6 +28,8 @@ import org.apache.spark.SparkException
*
* @param label Label for this data point.
* @param features List of features for this data point.
+ *
+ * @since 0.8.0
*/
@BeanInfo
case class LabeledPoint(label: Double, features: Vector) {
@@ -38,11 +40,15 @@ case class LabeledPoint(label: Double, features: Vector) {
/**
* Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
+ *
+ * @since 1.1.0
*/
object LabeledPoint {
/**
* Parses a string resulted from `LabeledPoint#toString` into
* an [[org.apache.spark.mllib.regression.LabeledPoint]].
+ *
+ * @since 1.1.0
*/
def parse(s: String): LabeledPoint = {
if (s.startsWith("(")) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 4f482384f0..03eb589b05 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -30,6 +30,8 @@ import org.apache.spark.rdd.RDD
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
*/
class LassoModel (
override val weights: Vector,
@@ -44,6 +46,9 @@ class LassoModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}
+ /**
+ * @since 1.3.0
+ */
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}
@@ -51,8 +56,14 @@ class LassoModel (
override protected def formatVersion: String = "1.0"
}
+/**
+ * @since 1.3.0
+ */
object LassoModel extends Loader[LassoModel] {
+ /**
+ * @since 1.3.0
+ */
override def load(sc: SparkContext, path: String): LassoModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -106,6 +117,8 @@ class LassoWithSGD private (
/**
* Top-level methods for calling Lasso.
+ *
+ * @since 0.8.0
*/
object LassoWithSGD {
@@ -123,6 +136,8 @@ object LassoWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
+ *
+ * @since 1.0.0
*/
def train(
input: RDD[LabeledPoint],
@@ -146,6 +161,8 @@ object LassoWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -167,6 +184,8 @@ object LassoWithSGD {
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -185,6 +204,8 @@ object LassoWithSGD {
* matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 9453c4f66c..fb5c220daa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -30,6 +30,8 @@ import org.apache.spark.rdd.RDD
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
*/
class LinearRegressionModel (
override val weights: Vector,
@@ -44,6 +46,9 @@ class LinearRegressionModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}
+ /**
+ * @since 1.3.0
+ */
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}
@@ -51,8 +56,14 @@ class LinearRegressionModel (
override protected def formatVersion: String = "1.0"
}
+/**
+ * @since 1.3.0
+ */
object LinearRegressionModel extends Loader[LinearRegressionModel] {
+ /**
+ * @since 1.3.0
+ */
override def load(sc: SparkContext, path: String): LinearRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -105,6 +116,8 @@ class LinearRegressionWithSGD private[mllib] (
/**
* Top-level methods for calling LinearRegression.
+ *
+ * @since 0.8.0
*/
object LinearRegressionWithSGD {
@@ -121,6 +134,8 @@ object LinearRegressionWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
+ *
+ * @since 1.0.0
*/
def train(
input: RDD[LabeledPoint],
@@ -142,6 +157,8 @@ object LinearRegressionWithSGD {
* @param numIterations Number of iterations of gradient descent to run.
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param miniBatchFraction Fraction of data to be used per iteration.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -161,6 +178,8 @@ object LinearRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of Gradient Descent.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LinearRegressionModel which has the weights and offset from training.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -178,6 +197,8 @@ object LinearRegressionWithSGD {
* matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @return a LinearRegressionModel which has the weights and offset from training.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index 214ac4d0ed..b097fd38fd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -24,6 +24,9 @@ import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.rdd.RDD
+/**
+ * @since 0.8.0
+ */
@Experimental
trait RegressionModel extends Serializable {
/**
@@ -31,6 +34,8 @@ trait RegressionModel extends Serializable {
*
* @param testData RDD representing data points to be predicted
* @return RDD[Double] where each entry contains the corresponding prediction
+ *
+ * @since 1.0.0
*/
def predict(testData: RDD[Vector]): RDD[Double]
@@ -39,6 +44,8 @@ trait RegressionModel extends Serializable {
*
* @param testData array representing a single data point
* @return Double prediction from the trained model
+ *
+ * @since 1.0.0
*/
def predict(testData: Vector): Double
@@ -46,6 +53,8 @@ trait RegressionModel extends Serializable {
* Predict values for examples stored in a JavaRDD.
* @param testData JavaRDD representing data points to be predicted
* @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
+ *
+ * @since 1.0.0
*/
def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 7d28ffad45..5bced6b4b7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
*/
class RidgeRegressionModel (
override val weights: Vector,
@@ -45,6 +47,9 @@ class RidgeRegressionModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}
+ /**
+ * @since 1.3.0
+ */
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}
@@ -52,8 +57,14 @@ class RidgeRegressionModel (
override protected def formatVersion: String = "1.0"
}
+/**
+ * @since 1.3.0
+ */
object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
+ /**
+ * @since 1.3.0
+ */
override def load(sc: SparkContext, path: String): RidgeRegressionModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
@@ -108,6 +119,8 @@ class RidgeRegressionWithSGD private (
/**
* Top-level methods for calling RidgeRegression.
+ *
+ * @since 0.8.0
*/
object RidgeRegressionWithSGD {
@@ -124,6 +137,8 @@ object RidgeRegressionWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -146,6 +161,8 @@ object RidgeRegressionWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -166,6 +183,8 @@ object RidgeRegressionWithSGD {
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
* @return a RidgeRegressionModel which has the weights and offset from training.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
@@ -183,6 +202,8 @@ object RidgeRegressionWithSGD {
* @param input RDD of (label, array of features) pairs.
* @param numIterations Number of iterations of gradient descent to run.
* @return a RidgeRegressionModel which has the weights and offset from training.
+ *
+ * @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index 141052ba81..a2ab95c474 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -53,6 +53,8 @@ import org.apache.spark.streaming.dstream.DStream
* It is also ok to call trainOn on different streams; this will update
* the model using each of the different sources, in sequence.
*
+ *
+ * @since 1.1.0
*/
@DeveloperApi
abstract class StreamingLinearAlgorithm[
@@ -65,7 +67,11 @@ abstract class StreamingLinearAlgorithm[
/** The algorithm to use for updating. */
protected val algorithm: A
- /** Return the latest model. */
+ /**
+ * Return the latest model.
+ *
+ * @since 1.1.0
+ */
def latestModel(): M = {
model.get
}
@@ -77,6 +83,8 @@ abstract class StreamingLinearAlgorithm[
* batch of data from the stream.
*
* @param data DStream containing labeled data
+ *
+ * @since 1.3.0
*/
def trainOn(data: DStream[LabeledPoint]): Unit = {
if (model.isEmpty) {
@@ -95,7 +103,11 @@ abstract class StreamingLinearAlgorithm[
}
}
- /** Java-friendly version of `trainOn`. */
+ /**
+ * Java-friendly version of `trainOn`.
+ *
+ * @since 1.3.0
+ */
def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
/**
@@ -103,6 +115,8 @@ abstract class StreamingLinearAlgorithm[
*
* @param data DStream containing feature vectors
* @return DStream containing predictions
+ *
+ * @since 1.1.0
*/
def predictOn(data: DStream[Vector]): DStream[Double] = {
if (model.isEmpty) {
@@ -111,7 +125,11 @@ abstract class StreamingLinearAlgorithm[
data.map{x => model.get.predict(x)}
}
- /** Java-friendly version of `predictOn`. */
+ /**
+ * Java-friendly version of `predictOn`.
+ *
+ * @since 1.1.0
+ */
def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = {
JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]])
}
@@ -121,6 +139,8 @@ abstract class StreamingLinearAlgorithm[
* @param data DStream containing feature vectors
* @tparam K key type
* @return DStream containing the input keys and the predictions as values
+ *
+ * @since 1.1.0
*/
def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = {
if (model.isEmpty) {
@@ -130,7 +150,11 @@ abstract class StreamingLinearAlgorithm[
}
- /** Java-friendly version of `predictOnValues`. */
+ /**
+ * Java-friendly version of `predictOnValues`.
+ *
+ * @since 1.3.0
+ */
def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = {
implicit val tag = fakeClassTag[K]
JavaPairDStream.fromPairDStream(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index c6d04464a1..537a05274e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -39,7 +39,6 @@ import org.apache.spark.mllib.linalg.Vector
* .setNumIterations(10)
* .setInitialWeights(Vectors.dense(...))
* .trainOn(DStream)
- *
*/
@Experimental
class StreamingLinearRegressionWithSGD private[mllib] (
@@ -61,31 +60,41 @@ class StreamingLinearRegressionWithSGD private[mllib] (
protected var model: Option[LinearRegressionModel] = None
- /** Set the step size for gradient descent. Default: 0.1. */
+ /**
+ * Set the step size for gradient descent. Default: 0.1.
+ */
def setStepSize(stepSize: Double): this.type = {
this.algorithm.optimizer.setStepSize(stepSize)
this
}
- /** Set the number of iterations of gradient descent to run per update. Default: 50. */
+ /**
+ * Set the number of iterations of gradient descent to run per update. Default: 50.
+ */
def setNumIterations(numIterations: Int): this.type = {
this.algorithm.optimizer.setNumIterations(numIterations)
this
}
- /** Set the fraction of each batch to use for updates. Default: 1.0. */
+ /**
+ * Set the fraction of each batch to use for updates. Default: 1.0.
+ */
def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
this
}
- /** Set the initial weights. */
+ /**
+ * Set the initial weights.
+ */
def setInitialWeights(initialWeights: Vector): this.type = {
this.model = Some(algorithm.createModel(initialWeights, 0.0))
this
}
- /** Set the convergence tolerance. */
+ /**
+ * Set the convergence tolerance.
+ */
def setConvergenceTol(tolerance: Double): this.type = {
this.algorithm.optimizer.setConvergenceTol(tolerance)
this