aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-08-25 22:49:33 -0700
committerDB Tsai <dbt@netflix.com>2015-08-25 22:49:46 -0700
commit08d390f457f80ffdc2dfce61ea579d9026047f12 (patch)
treeb10d23aa0d90d762bc673ea7d122c57f55fefa06
parent6d8ebc801799714d297c83be6935b37e26dc2df7 (diff)
downloadspark-08d390f457f80ffdc2dfce61ea579d9026047f12.tar.gz
spark-08d390f457f80ffdc2dfce61ea579d9026047f12.tar.bz2
spark-08d390f457f80ffdc2dfce61ea579d9026047f12.zip
[SPARK-10235] [MLLIB] update since versions in mllib.regression
Same as #8421 but for `mllib.regression`. cc freeman-lab dbtsai Author: Xiangrui Meng <meng@databricks.com> Closes #8426 from mengxr/SPARK-10235 and squashes the following commits: 6cd28e4 [Xiangrui Meng] update since versions in mllib.regression (cherry picked from commit 4657fa1f37d41dd4c7240a960342b68c7c591f48) Signed-off-by: DB Tsai <dbt@netflix.com>
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala16
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala11
8 files changed, 47 insertions, 29 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 509f6a2d16..7e3b4d5648 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -38,7 +38,9 @@ import org.apache.spark.storage.StorageLevel
*/
@Since("0.8.0")
@DeveloperApi
-abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
+abstract class GeneralizedLinearModel @Since("1.0.0") (
+ @Since("1.0.0") val weights: Vector,
+ @Since("0.8.0") val intercept: Double)
extends Serializable {
/**
@@ -107,7 +109,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
* The optimizer to solve the problem.
*
*/
- @Since("1.0.0")
+ @Since("0.8.0")
def optimizer: Optimizer
/** Whether to add intercept (default: false). */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 31ca7c2f20..877d31ba41 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -50,10 +50,10 @@ import org.apache.spark.sql.SQLContext
*/
@Since("1.3.0")
@Experimental
-class IsotonicRegressionModel (
- val boundaries: Array[Double],
- val predictions: Array[Double],
- val isotonic: Boolean) extends Serializable with Saveable {
+class IsotonicRegressionModel @Since("1.3.0") (
+ @Since("1.3.0") val boundaries: Array[Double],
+ @Since("1.3.0") val predictions: Array[Double],
+ @Since("1.3.0") val isotonic: Boolean) extends Serializable with Saveable {
private val predictionOrd = if (isotonic) Ordering[Double] else Ordering[Double].reverse
@@ -63,7 +63,6 @@ class IsotonicRegressionModel (
/**
* A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
- *
*/
@Since("1.4.0")
def this(boundaries: java.lang.Iterable[Double],
@@ -214,8 +213,6 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
}
}
- /**
- */
@Since("1.4.0")
override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
implicit val formats = DefaultFormats
@@ -256,6 +253,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
* @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
*/
@Experimental
+@Since("1.3.0")
class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
/**
@@ -263,6 +261,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
*
* @return New instance of IsotonicRegression.
*/
+ @Since("1.3.0")
def this() = this(true)
/**
@@ -271,6 +270,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
* @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
* @return This instance of IsotonicRegression.
*/
+ @Since("1.3.0")
def setIsotonic(isotonic: Boolean): this.type = {
this.isotonic = isotonic
this
@@ -286,6 +286,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
* the algorithm is executed.
* @return Isotonic regression model.
*/
+ @Since("1.3.0")
def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
val preprocessedInput = if (isotonic) {
input
@@ -311,6 +312,7 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
* the algorithm is executed.
* @return Isotonic regression model.
*/
+ @Since("1.3.0")
def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index f7fe1b7b21..c284ad2325 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -29,11 +29,12 @@ import org.apache.spark.SparkException
*
* @param label Label for this data point.
* @param features List of features for this data point.
- *
*/
@Since("0.8.0")
@BeanInfo
-case class LabeledPoint(label: Double, features: Vector) {
+case class LabeledPoint @Since("1.0.0") (
+ @Since("0.8.0") label: Double,
+ @Since("1.0.0") features: Vector) {
override def toString: String = {
s"($label,$features)"
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 556411a366..a9aba173fa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -34,9 +34,9 @@ import org.apache.spark.rdd.RDD
*
*/
@Since("0.8.0")
-class LassoModel (
- override val weights: Vector,
- override val intercept: Double)
+class LassoModel @Since("1.1.0") (
+ @Since("1.0.0") override val weights: Vector,
+ @Since("0.8.0") override val intercept: Double)
extends GeneralizedLinearModel(weights, intercept)
with RegressionModel with Serializable with Saveable with PMMLExportable {
@@ -84,6 +84,7 @@ object LassoModel extends Loader[LassoModel] {
* its corresponding right hand side label y.
* See also the documentation for the precise formulation.
*/
+@Since("0.8.0")
class LassoWithSGD private (
private var stepSize: Double,
private var numIterations: Int,
@@ -93,6 +94,7 @@ class LassoWithSGD private (
private val gradient = new LeastSquaresGradient()
private val updater = new L1Updater()
+ @Since("0.8.0")
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
@@ -103,6 +105,7 @@ class LassoWithSGD private (
* Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
* regParam: 0.01, miniBatchFraction: 1.0}.
*/
+ @Since("0.8.0")
def this() = this(1.0, 100, 0.01, 1.0)
override protected def createModel(weights: Vector, intercept: Double) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 00ab06e3ba..4996ace5df 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -34,9 +34,9 @@ import org.apache.spark.rdd.RDD
*
*/
@Since("0.8.0")
-class LinearRegressionModel (
- override val weights: Vector,
- override val intercept: Double)
+class LinearRegressionModel @Since("1.1.0") (
+ @Since("1.0.0") override val weights: Vector,
+ @Since("0.8.0") override val intercept: Double)
extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable
with Saveable with PMMLExportable {
@@ -85,6 +85,7 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
* its corresponding right hand side label y.
* See also the documentation for the precise formulation.
*/
+@Since("0.8.0")
class LinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
@@ -93,6 +94,7 @@ class LinearRegressionWithSGD private[mllib] (
private val gradient = new LeastSquaresGradient()
private val updater = new SimpleUpdater()
+ @Since("0.8.0")
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
@@ -102,6 +104,7 @@ class LinearRegressionWithSGD private[mllib] (
* Construct a LinearRegression object with default parameters: {stepSize: 1.0,
* numIterations: 100, miniBatchFraction: 1.0}.
*/
+ @Since("0.8.0")
def this() = this(1.0, 100, 1.0)
override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 21a791d98b..0a44ff559d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -35,9 +35,9 @@ import org.apache.spark.rdd.RDD
*
*/
@Since("0.8.0")
-class RidgeRegressionModel (
- override val weights: Vector,
- override val intercept: Double)
+class RidgeRegressionModel @Since("1.1.0") (
+ @Since("1.0.0") override val weights: Vector,
+ @Since("0.8.0") override val intercept: Double)
extends GeneralizedLinearModel(weights, intercept)
with RegressionModel with Serializable with Saveable with PMMLExportable {
@@ -85,6 +85,7 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
* its corresponding right hand side label y.
* See also the documentation for the precise formulation.
*/
+@Since("0.8.0")
class RidgeRegressionWithSGD private (
private var stepSize: Double,
private var numIterations: Int,
@@ -94,7 +95,7 @@ class RidgeRegressionWithSGD private (
private val gradient = new LeastSquaresGradient()
private val updater = new SquaredL2Updater()
-
+ @Since("0.8.0")
override val optimizer = new GradientDescent(gradient, updater)
.setStepSize(stepSize)
.setNumIterations(numIterations)
@@ -105,6 +106,7 @@ class RidgeRegressionWithSGD private (
* Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
* regParam: 0.01, miniBatchFraction: 1.0}.
*/
+ @Since("0.8.0")
def this() = this(1.0, 100, 0.01, 1.0)
override protected def createModel(weights: Vector, intercept: Double) = {
@@ -134,7 +136,7 @@ object RidgeRegressionWithSGD {
* the number of features in the data.
*
*/
- @Since("0.8.0")
+ @Since("1.0.0")
def train(
input: RDD[LabeledPoint],
numIterations: Int,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index cd3ed8a154..73948b2d98 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
import org.apache.spark.Logging
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream}
import org.apache.spark.streaming.dstream.DStream
@@ -83,9 +83,8 @@ abstract class StreamingLinearAlgorithm[
* batch of data from the stream.
*
* @param data DStream containing labeled data
- *
*/
- @Since("1.3.0")
+ @Since("1.1.0")
def trainOn(data: DStream[LabeledPoint]): Unit = {
if (model.isEmpty) {
throw new IllegalArgumentException("Model must be initialized before starting training.")
@@ -105,7 +104,6 @@ abstract class StreamingLinearAlgorithm[
/**
* Java-friendly version of `trainOn`.
- *
*/
@Since("1.3.0")
def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
@@ -129,7 +127,7 @@ abstract class StreamingLinearAlgorithm[
* Java-friendly version of `predictOn`.
*
*/
- @Since("1.1.0")
+ @Since("1.3.0")
def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = {
JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]])
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index 26654e4a06..fe1d487cdd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -17,7 +17,7 @@
package org.apache.spark.mllib.regression
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.mllib.linalg.Vector
/**
@@ -41,6 +41,7 @@ import org.apache.spark.mllib.linalg.Vector
* .trainOn(DStream)
*/
@Experimental
+@Since("1.1.0")
class StreamingLinearRegressionWithSGD private[mllib] (
private var stepSize: Double,
private var numIterations: Int,
@@ -54,8 +55,10 @@ class StreamingLinearRegressionWithSGD private[mllib] (
* Initial weights must be set before using trainOn or predictOn
* (see `StreamingLinearAlgorithm`)
*/
+ @Since("1.1.0")
def this() = this(0.1, 50, 1.0)
+ @Since("1.1.0")
val algorithm = new LinearRegressionWithSGD(stepSize, numIterations, miniBatchFraction)
protected var model: Option[LinearRegressionModel] = None
@@ -63,6 +66,7 @@ class StreamingLinearRegressionWithSGD private[mllib] (
/**
* Set the step size for gradient descent. Default: 0.1.
*/
+ @Since("1.1.0")
def setStepSize(stepSize: Double): this.type = {
this.algorithm.optimizer.setStepSize(stepSize)
this
@@ -71,6 +75,7 @@ class StreamingLinearRegressionWithSGD private[mllib] (
/**
* Set the number of iterations of gradient descent to run per update. Default: 50.
*/
+ @Since("1.1.0")
def setNumIterations(numIterations: Int): this.type = {
this.algorithm.optimizer.setNumIterations(numIterations)
this
@@ -79,6 +84,7 @@ class StreamingLinearRegressionWithSGD private[mllib] (
/**
* Set the fraction of each batch to use for updates. Default: 1.0.
*/
+ @Since("1.1.0")
def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
this
@@ -87,6 +93,7 @@ class StreamingLinearRegressionWithSGD private[mllib] (
/**
* Set the initial weights.
*/
+ @Since("1.1.0")
def setInitialWeights(initialWeights: Vector): this.type = {
this.model = Some(algorithm.createModel(initialWeights, 0.0))
this
@@ -95,9 +102,9 @@ class StreamingLinearRegressionWithSGD private[mllib] (
/**
* Set the convergence tolerance. Default: 0.001.
*/
+ @Since("1.5.0")
def setConvergenceTol(tolerance: Double): this.type = {
this.algorithm.optimizer.setConvergenceTol(tolerance)
this
}
-
}