From 18faa588ca11190890d2eb569d7497fbb25eee5c Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Wed, 22 Jun 2016 10:05:25 -0700 Subject: [SPARK-16127][ML][PYPSARK] Audit @Since annotations related to ml.linalg [SPARK-14615](https://issues.apache.org/jira/browse/SPARK-14615) and #12627 changed `spark.ml` pipelines to use the new `ml.linalg` classes for `Vector`/`Matrix`. Some `Since` annotations for public methods/vals have not been updated accordingly to be `2.0.0`. This PR updates them. ## How was this patch tested? Existing unit tests. Author: Nick Pentreath Closes #13840 from MLnick/SPARK-16127-ml-linalg-since. --- .../apache/spark/ml/classification/LogisticRegression.scala | 2 +- .../ml/classification/MultilayerPerceptronClassifier.scala | 2 +- .../org/apache/spark/ml/classification/NaiveBayes.scala | 4 ++-- .../main/scala/org/apache/spark/ml/clustering/KMeans.scala | 2 +- .../src/main/scala/org/apache/spark/ml/clustering/LDA.scala | 4 ++-- .../org/apache/spark/ml/feature/ElementwiseProduct.scala | 6 +++--- .../main/scala/org/apache/spark/ml/feature/Normalizer.scala | 12 ++++++------ .../org/apache/spark/ml/feature/PolynomialExpansion.scala | 12 ++++++------ .../main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 2 +- .../apache/spark/ml/regression/AFTSurvivalRegression.scala | 6 +++--- .../org/apache/spark/ml/regression/IsotonicRegression.scala | 4 ++-- .../org/apache/spark/ml/regression/LinearRegression.scala | 6 +++--- python/pyspark/ml/classification.py | 8 ++++---- python/pyspark/ml/regression.py | 8 ++++++-- 14 files changed, 41 insertions(+), 37 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 2fa8fbcc76..be69d46eeb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -482,7 +482,7 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] { @Experimental class LogisticRegressionModel private[spark] ( @Since("1.4.0") override val uid: String, - @Since("1.6.0") val coefficients: Vector, + @Since("2.0.0") val coefficients: Vector, @Since("1.3.0") val intercept: Double) extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel] with LogisticRegressionParams with MLWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index 700542117e..76ef32aa3d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -296,7 +296,7 @@ object MultilayerPerceptronClassifier class MultilayerPerceptronClassificationModel private[ml] ( @Since("1.5.0") override val uid: String, @Since("1.5.0") val layers: Array[Int], - @Since("1.5.0") val weights: Vector) + @Since("2.0.0") val weights: Vector) extends PredictionModel[Vector, MultilayerPerceptronClassificationModel] with Serializable with MLWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala index a9d493032b..7c340312df 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala @@ -130,8 +130,8 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] { @Experimental class NaiveBayesModel private[ml] ( @Since("1.5.0") override val uid: String, - @Since("1.5.0") val pi: Vector, - @Since("1.5.0") val theta: Matrix) + @Since("2.0.0") val pi: Vector, + @Since("2.0.0") val theta: Matrix) extends ProbabilisticClassificationModel[Vector, NaiveBayesModel] with NaiveBayesParams with MLWritable { diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 6f63d04818..9fb7d6a9a2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -131,7 +131,7 @@ class KMeansModel private[ml] ( private[clustering] def predict(features: Vector): Int = parentModel.predict(features) - @Since("1.5.0") + @Since("2.0.0") def clusterCenters: Array[Vector] = parentModel.clusterCenters.map(_.asML) /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 609e50eb49..b333d59258 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -432,7 +432,7 @@ sealed abstract class LDAModel private[ml] ( * If Online LDA was used and [[optimizeDocConcentration]] was set to false, * then this returns the fixed (given) value for the [[docConcentration]] parameter. */ - @Since("1.6.0") + @Since("2.0.0") def estimatedDocConcentration: Vector = getModel.docConcentration /** @@ -444,7 +444,7 @@ sealed abstract class LDAModel private[ml] ( * the Expectation-Maximization ("em") [[optimizer]], then this method could involve * collecting a large amount of data to the driver (on the order of vocabSize x k). */ - @Since("1.6.0") + @Since("2.0.0") def topicsMatrix: Matrix = oldLocalModel.topicsMatrix.asML /** Indicates whether this instance is of type [[DistributedLDAModel]] */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 92fefb1e6c..d07833e580 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -33,11 +33,11 @@ import org.apache.spark.sql.types.DataType * multiplier. */ @Experimental -@Since("2.0.0") -class ElementwiseProduct @Since("2.0.0") (@Since("2.0.0") override val uid: String) +@Since("1.4.0") +class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, ElementwiseProduct] with DefaultParamsWritable { - @Since("2.0.0") + @Since("1.4.0") def this() = this(Identifiable.randomUID("elemProd")) /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index 9a4e682890..f9cbad90c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -31,11 +31,11 @@ import org.apache.spark.sql.types.DataType * Normalize a vector to have unit norm using the given p-norm. */ @Experimental -@Since("2.0.0") -class Normalizer @Since("2.0.0") (@Since("2.0.0") override val uid: String) +@Since("1.4.0") +class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] with DefaultParamsWritable { - @Since("2.0.0") + @Since("1.4.0") def this() = this(Identifiable.randomUID("normalizer")) /** @@ -43,17 +43,17 @@ class Normalizer @Since("2.0.0") (@Since("2.0.0") override val uid: String) * (default: p = 2) * @group param */ - @Since("2.0.0") + @Since("1.4.0") val p = new DoubleParam(this, "p", "the p norm value", ParamValidators.gtEq(1)) setDefault(p -> 2.0) /** @group getParam */ - @Since("2.0.0") + @Since("1.4.0") def getP: Double = $(p) /** @group setParam */ - @Since("2.0.0") + @Since("1.4.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 026014c7d6..7b35fdeaf4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -35,11 +35,11 @@ import org.apache.spark.sql.types.DataType * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ @Experimental -@Since("2.0.0") -class PolynomialExpansion @Since("2.0.0") (@Since("2.0.0") override val uid: String) +@Since("1.4.0") +class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends UnaryTransformer[Vector, Vector, PolynomialExpansion] with DefaultParamsWritable { - @Since("2.0.0") + @Since("1.4.0") def this() = this(Identifiable.randomUID("poly")) /** @@ -47,18 +47,18 @@ class PolynomialExpansion @Since("2.0.0") (@Since("2.0.0") override val uid: Str * Default: 2 * @group param */ - @Since("2.0.0") + @Since("1.4.0") val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)", ParamValidators.gtEq(1)) setDefault(degree -> 2) /** @group getParam */ - @Since("2.0.0") + @Since("1.4.0") def getDegree: Int = $(degree) /** @group setParam */ - @Since("2.0.0") + @Since("1.4.0") def setDegree(value: Int): this.type = set(degree, value) override protected def createTransformFunc: Vector => Vector = { v => diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index a74d31ff9d..0cac3fa2d7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -240,7 +240,7 @@ class Word2VecModel private[ml] ( * of the word. Returns a dataframe with the words and the cosine similarities between the * synonyms and the given word vector. */ - @Since("1.5.0") + @Since("2.0.0") def findSynonyms(word: Vector, num: Int): DataFrame = { val spark = SparkSession.builder().getOrCreate() spark.createDataFrame(wordVectors.findSynonyms(word, num)).toDF("word", "similarity") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index fe65e3e810..2dbac49ccf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -286,7 +286,7 @@ object AFTSurvivalRegression extends DefaultParamsReadable[AFTSurvivalRegression @Since("1.6.0") class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") override val uid: String, - @Since("1.6.0") val coefficients: Vector, + @Since("2.0.0") val coefficients: Vector, @Since("1.6.0") val intercept: Double, @Since("1.6.0") val scale: Double) extends Model[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams with MLWritable { @@ -307,7 +307,7 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") def setQuantilesCol(value: String): this.type = set(quantilesCol, value) - @Since("1.6.0") + @Since("2.0.0") def predictQuantiles(features: Vector): Vector = { // scale parameter for the Weibull distribution of lifetime val lambda = math.exp(BLAS.dot(coefficients, features) + intercept) @@ -319,7 +319,7 @@ class AFTSurvivalRegressionModel private[ml] ( Vectors.dense(quantiles) } - @Since("1.6.0") + @Since("2.0.0") def predict(features: Vector): Double = { math.exp(BLAS.dot(coefficients, features) + intercept) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index f05b47eda7..9b9429a328 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -221,14 +221,14 @@ class IsotonicRegressionModel private[ml] ( def setFeatureIndex(value: Int): this.type = set(featureIndex, value) /** Boundaries in increasing order for which predictions are known. */ - @Since("1.5.0") + @Since("2.0.0") def boundaries: Vector = Vectors.dense(oldModel.boundaries) /** * Predictions associated with the boundaries at the same index, monotone because of isotonic * regression. */ - @Since("1.5.0") + @Since("2.0.0") def predictions: Vector = Vectors.dense(oldModel.predictions) @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 5e8ef1b375..2723f74724 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -387,9 +387,9 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] { @Since("1.3.0") @Experimental class LinearRegressionModel private[ml] ( - override val uid: String, - val coefficients: Vector, - val intercept: Double) + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val coefficients: Vector, + @Since("1.3.0") val intercept: Double) extends RegressionModel[Vector, LinearRegressionModel] with LinearRegressionParams with MLWritable { diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index d6d713ca53..c035942f73 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -224,7 +224,7 @@ class LogisticRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ @property - @since("1.6.0") + @since("2.0.0") def coefficients(self): """ Model coefficients. @@ -1051,7 +1051,7 @@ class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): """ @property - @since("1.5.0") + @since("2.0.0") def pi(self): """ log of class priors. @@ -1059,7 +1059,7 @@ class NaiveBayesModel(JavaModel, JavaMLWritable, JavaMLReadable): return self._call_java("pi") @property - @since("1.5.0") + @since("2.0.0") def theta(self): """ log of class conditional probabilities. @@ -1260,7 +1260,7 @@ class MultilayerPerceptronClassificationModel(JavaModel, JavaMLWritable, JavaMLR return self._call_java("javaLayers") @property - @since("1.6.0") + @since("2.0.0") def weights(self): """ vector of initial weights for the model that consists of the weights of layers. diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 29efd6a852..8de9ad8531 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -138,7 +138,7 @@ class LinearRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ @property - @since("1.6.0") + @since("2.0.0") def coefficients(self): """ Model coefficients. @@ -511,6 +511,7 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ @property + @since("2.0.0") def boundaries(self): """ Boundaries in increasing order for which predictions are known. @@ -518,6 +519,7 @@ class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): return self._call_java("boundaries") @property + @since("2.0.0") def predictions(self): """ Predictions associated with the boundaries at the same index, monotone because of isotonic @@ -1248,7 +1250,7 @@ class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ @property - @since("1.6.0") + @since("2.0.0") def coefficients(self): """ Model coefficients. @@ -1271,12 +1273,14 @@ class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable): """ return self._call_java("scale") + @since("2.0.0") def predictQuantiles(self, features): """ Predicted Quantiles """ return self._call_java("predictQuantiles", features) + @since("2.0.0") def predict(self, features): """ Predicted value -- cgit v1.2.3