aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorZheng RuiFeng <ruifengz@foxmail.com>2016-06-05 11:55:25 -0700
committerReynold Xin <rxin@databricks.com>2016-06-05 11:55:25 -0700
commit372fa61f511843f53498b9e843a84cfdd76fa2b2 (patch)
treed2b2f2a58d4f5441f984630b1bc2758d8e3874a3 /mllib
parent4e767d0f9042bfea6074c2637438859699ec4dc3 (diff)
downloadspark-372fa61f511843f53498b9e843a84cfdd76fa2b2.tar.gz
spark-372fa61f511843f53498b9e843a84cfdd76fa2b2.tar.bz2
spark-372fa61f511843f53498b9e843a84cfdd76fa2b2.zip
[SPARK-15770][ML] Annotation audit for Experimental and DeveloperApi
## What changes were proposed in this pull request? 1, remove comments `:: Experimental ::` for non-experimental API 2, add comments `:: Experimental ::` for experimental API 3, add comments `:: DeveloperApi ::` for developerApi API ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #13514 from zhengruifeng/del_experimental.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala17
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala1
13 files changed, 50 insertions, 3 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
index 5c7089b491..078fecf088 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala
@@ -27,6 +27,9 @@ import org.apache.spark.annotation.DeveloperApi
@DeveloperApi
sealed abstract class AttributeType(val name: String)
+/**
+ * :: DeveloperApi ::
+ */
@DeveloperApi
object AttributeType {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 5aec692c98..609e50eb49 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -675,6 +675,8 @@ class DistributedLDAModel private[ml] (
private var _checkpointFiles: Array[String] = oldDistributedModel.checkpointFiles
/**
+ * :: DeveloperApi ::
+ *
* If using checkpointing and [[LDA.keepLastCheckpoint]] is set to true, then there may be
* saved checkpoint files. This method is provided so that users can manage those files.
*
@@ -689,6 +691,8 @@ class DistributedLDAModel private[ml] (
def getCheckpointFiles: Array[String] = _checkpointFiles
/**
+ * :: DeveloperApi ::
+ *
* Remove any remaining checkpoint files from training.
*
* @see [[getCheckpointFiles]]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index e4bd0dc25e..91edcf2a79 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
/**
+ * :: Experimental ::
+ *
* A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
* by Steinbach, Karypis, and Kumar, with modification to fit Spark.
* The algorithm starts from a single cluster that contains all points.
@@ -396,6 +398,8 @@ private object BisectingKMeans extends Serializable {
}
/**
+ * :: Experimental ::
+ *
* Represents a node in a clustering tree.
*
* @param index node index, negative for internal nodes and non-negative for leaf nodes
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index b3546a1ee3..11fd940b8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -32,6 +32,8 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SparkSession}
/**
+ * :: Experimental ::
+ *
* Clustering model produced by [[BisectingKMeans]].
* The prediction is done level-by-level from the root node to a leaf node, and at each node among
* its children the closest to the input point is selected.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 4f07236225..d295826300 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -425,6 +425,13 @@ class LocalLDAModel private[spark] (
}
+/**
+ * :: Experimental ::
+ *
+ * Local (non-distributed) model fitted by [[LDA]].
+ *
+ * This model stores the inferred topics only; it does not store info about the training dataset.
+ */
@Experimental
@Since("1.5.0")
object LocalLDAModel extends Loader[LocalLDAModel] {
@@ -814,7 +821,15 @@ class DistributedLDAModel private[clustering] (
}
}
-
+/**
+ * :: Experimental ::
+ *
+ * Distributed model fitted by [[LDA]].
+ * This type of model is currently only produced by Expectation-Maximization (EM).
+ *
+ * This model stores the inferred topics, the full training dataset, and the topic distribution
+ * for each training document.
+ */
@Experimental
@Since("1.5.0")
object DistributedLDAModel extends Loader[DistributedLDAModel] {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index 719695a338..9a6a8dbdcc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -25,7 +25,6 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
/**
- * ::Experimental::
* Evaluator for multiclass classification.
*
* @param predictionAndLabels an RDD of (prediction, label) pairs.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 4ed4a05894..e29b51c3a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
/**
- * ::Experimental::
* Evaluator for ranking algorithms.
*
* Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index ee97045f34..b7d6c60568 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -97,6 +97,9 @@ class StandardScalerModel @Since("1.3.0") (
@Since("1.3.0")
def this(std: Vector) = this(std, null)
+ /**
+ * :: DeveloperApi ::
+ */
@Since("1.3.0")
@DeveloperApi
def setWithMean(withMean: Boolean): this.type = {
@@ -105,6 +108,9 @@ class StandardScalerModel @Since("1.3.0") (
this
}
+ /**
+ * :: DeveloperApi ::
+ */
@Since("1.3.0")
@DeveloperApi
def setWithStd(withStd: Boolean): this.type = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index b0a716936a..c2bc1f17cc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -418,6 +418,7 @@ object RandomRDDs {
}
/**
+ * :: DeveloperApi ::
* [[RandomRDDs#randomJavaRDD]] with the default seed.
*/
@DeveloperApi
@@ -431,6 +432,7 @@ object RandomRDDs {
}
/**
+ * :: DeveloperApi ::
* [[RandomRDDs#randomJavaRDD]] with the default seed & numPartitions
*/
@DeveloperApi
@@ -854,6 +856,7 @@ object RandomRDDs {
}
/**
+ * :: DeveloperApi ::
* Java-friendly version of [[RandomRDDs#randomVectorRDD]].
*/
@DeveloperApi
@@ -869,6 +872,7 @@ object RandomRDDs {
}
/**
+ * :: DeveloperApi ::
* [[RandomRDDs#randomJavaVectorRDD]] with the default seed.
*/
@DeveloperApi
@@ -883,6 +887,7 @@ object RandomRDDs {
}
/**
+ * :: DeveloperApi ::
* [[RandomRDDs#randomJavaVectorRDD]] with the default number of partitions and the default seed.
*/
@DeveloperApi
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 1b93e2d764..af4bc9f290 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.util.BoundedPriorityQueue
/**
+ * :: DeveloperApi ::
* Machine learning specific Pair RDD functions.
*/
@DeveloperApi
@@ -50,6 +51,9 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
}
}
+/**
+ * :: DeveloperApi ::
+ */
@DeveloperApi
object MLPairRDDFunctions {
/** Implicit conversion from a pair RDD to MLPairRDDFunctions. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index e8a937ffcb..0f7857b8d8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -24,6 +24,7 @@ import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
/**
+ * :: DeveloperApi ::
* Machine learning specific RDD functions.
*/
@DeveloperApi
@@ -53,6 +54,9 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
}
+/**
+ * :: DeveloperApi ::
+ */
@DeveloperApi
object RDDFunctions {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 467cb83cd1..cc9ee15738 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -216,6 +216,7 @@ class ALS private (
}
/**
+ * :: DeveloperApi ::
* Set period (in iterations) between checkpoints (default = 10). Checkpointing helps with
* recovery (when nodes fail) and StackOverflow exceptions caused by long lineage. It also helps
* with eliminating temporary shuffle files on disk, which can be important when there are many
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
index 06ceff19d8..1dbdd2d860 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Predict.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.tree.model
import org.apache.spark.annotation.{DeveloperApi, Since}
/**
+ * :: DeveloperApi ::
* Predicted value for a node
* @param predict predicted value
* @param prob probability of the label (classification only)