From 0adc932add413a1754107b21d5ecfb38c0c3a4eb Mon Sep 17 00:00:00 2001 From: Xiangrui Meng Date: Wed, 9 Apr 2014 17:08:17 -0700 Subject: [SPARK-1357 (fix)] remove empty line after :: DeveloperApi/Experimental :: Remove empty line after :: DeveloperApi/Experimental :: in comments to make the original doc show up in the preview of the generated html docs. Thanks @andrewor14 ! Author: Xiangrui Meng Closes #373 from mengxr/api and squashes the following commits: 9c35bdc [Xiangrui Meng] remove the empty line after :: DeveloperApi/Experimental :: --- .../spark/mllib/api/python/PythonMLLibAPI.scala | 1 - .../spark/mllib/classification/NaiveBayes.scala | 1 - .../org/apache/spark/mllib/clustering/KMeans.scala | 4 -- .../linalg/distributed/CoordinateMatrix.scala | 1 - .../linalg/distributed/IndexedRowMatrix.scala | 2 - .../spark/mllib/linalg/distributed/RowMatrix.scala | 1 - .../apache/spark/mllib/optimization/Gradient.scala | 4 -- .../spark/mllib/optimization/GradientDescent.scala | 2 - .../spark/mllib/optimization/Optimizer.scala | 1 - .../apache/spark/mllib/optimization/Updater.scala | 4 -- .../apache/spark/mllib/recommendation/ALS.scala | 1 - .../recommendation/MatrixFactorizationModel.scala | 1 - .../regression/GeneralizedLinearAlgorithm.scala | 1 - .../org/apache/spark/mllib/tree/DecisionTree.scala | 1 - .../spark/mllib/tree/configuration/Algo.scala | 1 - .../mllib/tree/configuration/FeatureType.scala | 1 - .../tree/configuration/QuantileStrategy.scala | 1 - .../spark/mllib/tree/configuration/Strategy.scala | 1 - .../apache/spark/mllib/tree/impurity/Entropy.scala | 2 - .../apache/spark/mllib/tree/impurity/Gini.scala | 2 - .../spark/mllib/tree/impurity/Impurity.scala | 3 -- .../spark/mllib/tree/impurity/Variance.scala | 2 - .../spark/mllib/tree/model/DecisionTreeModel.scala | 1 - .../mllib/tree/model/InformationGainStats.scala | 1 - .../org/apache/spark/mllib/tree/model/Node.scala | 1 - .../org/apache/spark/mllib/tree/model/Split.scala | 1 - .../apache/spark/mllib/util/DataValidators.scala | 1 - .../spark/mllib/util/KMeansDataGenerator.scala | 1 - .../spark/mllib/util/LinearDataGenerator.scala | 1 - .../util/LogisticRegressionDataGenerator.scala | 1 - .../apache/spark/mllib/util/MFDataGenerator.scala | 43 +++++++++++----------- .../org/apache/spark/mllib/util/MLUtils.scala | 2 - .../apache/spark/mllib/util/SVMDataGenerator.scala | 1 - 33 files changed, 21 insertions(+), 71 deletions(-) (limited to 'mllib') diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index ae27c57799..a6c049e517 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -30,7 +30,6 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: - * * The Java stubs necessary for the Python mllib bindings. */ @DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala index 5a45f12f1a..18658850a2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala @@ -29,7 +29,6 @@ import org.apache.spark.rdd.RDD /** * :: Experimental :: - * * Model for Naive Bayes Classifiers. * * @param labels list of labels diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala index 8f565eb60a..90cf8525df 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala @@ -78,7 +78,6 @@ class KMeans private ( /** * :: Experimental :: - * * Set the number of runs of the algorithm to execute in parallel. We initialize the algorithm * this many times with random starting conditions (configured by the initialization mode), then * return the best clustering found over any run. Default: 1. @@ -398,9 +397,6 @@ object KMeans { MLUtils.fastSquaredDistance(v1.vector, v1.norm, v2.vector, v2.norm) } - /** - * :: Experimental :: - */ @Experimental def main(args: Array[String]) { if (args.length < 4) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala index 89d5c03d76..56b8fdcda6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala @@ -34,7 +34,6 @@ case class MatrixEntry(i: Long, j: Long, value: Double) /** * :: Experimental :: - * * Represents a matrix in coordinate format. * * @param entries matrix entries diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index 24c123ab7e..132b3af72d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -26,7 +26,6 @@ import org.apache.spark.mllib.linalg.SingularValueDecomposition /** * :: Experimental :: - * * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]]. */ @Experimental @@ -34,7 +33,6 @@ case class IndexedRow(index: Long, vector: Vector) /** * :: Experimental :: - * * Represents a row-oriented [[org.apache.spark.mllib.linalg.distributed.DistributedMatrix]] with * indexed rows. * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 8d32c1a6db..f65f43dd30 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -30,7 +30,6 @@ import org.apache.spark.Logging /** * :: Experimental :: - * * Represents a row-oriented distributed Matrix with no meaningful row indices. * * @param rows rows stored as an RDD[Vector] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala index 1176dc9dbc..679842f831 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector} /** * :: DeveloperApi :: - * * Class used to compute the gradient for a loss function, given a single data point. */ @DeveloperApi @@ -56,7 +55,6 @@ abstract class Gradient extends Serializable { /** * :: DeveloperApi :: - * * Compute gradient and loss for a logistic loss function, as used in binary classification. * See also the documentation for the precise formulation. */ @@ -100,7 +98,6 @@ class LogisticGradient extends Gradient { /** * :: DeveloperApi :: - * * Compute gradient and loss for a Least-squared loss function, as used in linear regression. * This is correct for the averaged least squares loss function (mean squared error) * L = 1/n ||A weights-y||^2 @@ -135,7 +132,6 @@ class LeastSquaresGradient extends Gradient { /** * :: DeveloperApi :: - * * Compute gradient and loss for a Hinge loss function, as used in SVM binary classification. * See also the documentation for the precise formulation. * NOTE: This assumes that the labels are {0,1} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala index 04267d967d..f60417f21d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -28,7 +28,6 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector} /** * :: DeveloperApi :: - * * Class used to solve an optimization problem using Gradient Descent. * @param gradient Gradient function to be used. * @param updater Updater to be used to update weights after every iteration. @@ -113,7 +112,6 @@ class GradientDescent(private var gradient: Gradient, private var updater: Updat /** * :: DeveloperApi :: - * * Top-level method to run gradient descent. */ @DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala index 0a313f3104..e41d9bbe18 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.linalg.Vector /** * :: DeveloperApi :: - * * Trait for optimization problem solvers. */ @DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala index e67816796c..3ed3a5b9b3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala @@ -26,7 +26,6 @@ import org.apache.spark.mllib.linalg.{Vectors, Vector} /** * :: DeveloperApi :: - * * Class used to perform steps (weight update) using Gradient Descent methods. * * For general minimization problems, or for regularized problems of the form @@ -64,7 +63,6 @@ abstract class Updater extends Serializable { /** * :: DeveloperApi :: - * * A simple updater for gradient descent *without* any regularization. * Uses a step-size decreasing with the square root of the number of iterations. */ @@ -86,7 +84,6 @@ class SimpleUpdater extends Updater { /** * :: DeveloperApi :: - * * Updater for L1 regularized problems. * R(w) = ||w||_1 * Uses a step-size decreasing with the square root of the number of iterations. @@ -131,7 +128,6 @@ class L1Updater extends Updater { /** * :: DeveloperApi :: - * * Updater for L2 regularized problems. * R(w) = 1/2 ||w||^2 * Uses a step-size decreasing with the square root of the number of iterations. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index 60cbb1c1e1..5cc47de8ff 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -140,7 +140,6 @@ class ALS private ( /** * :: Experimental :: - * * Sets the constant used in computing confidence in implicit ALS. Default: 1.0. */ @Experimental diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index e05224fc7c..471546cd82 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -70,7 +70,6 @@ class MatrixFactorizationModel( /** * :: DeveloperApi :: - * * Predict the rating of many users for many products. * This is a Java stub for python predictAll() * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala index c24f5afb99..3bd0017aa1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala @@ -103,7 +103,6 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] /** * :: Experimental :: - * * Set if the algorithm should validate data before training. Default true. */ @Experimental diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index c8a966cd5f..3019447ce4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -35,7 +35,6 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors} /** * :: Experimental :: - * * A class that implements a decision tree algorithm for classification and regression. It * supports both continuous and categorical features. * @param strategy The configuration parameters for the tree algorithm which specify the type diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala index 017f84f3b9..79a01f5831 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Algo.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: - * * Enum to select the algorithm for the decision tree */ @Experimental diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala index c0254c32c2..f4c8772327 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/FeatureType.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: - * * Enum to describe whether a feature is "continuous" or "categorical" */ @Experimental diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala index b3e8b224be..7da976e55a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/QuantileStrategy.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.Experimental /** * :: Experimental :: - * * Enum for selecting the quantile calculation strategy */ @Experimental diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index 482faaa9e7..8767aca47c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ /** * :: Experimental :: - * * Stores all the configuration options for tree construction * @param algo classification or regression * @param impurity criterion used for information gain calculation diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala index 55c43f2fcf..60f43e9278 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Entropy.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} /** * :: Experimental :: - * * Class for calculating [[http://en.wikipedia.org/wiki/Binary_entropy_function entropy]] during * binary classification. */ @@ -32,7 +31,6 @@ object Entropy extends Impurity { /** * :: DeveloperApi :: - * * entropy calculation * @param c0 count of instances with label 0 * @param c1 count of instances with label 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala index c923b8e8f4..c51d76d9b4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} /** * :: Experimental :: - * * Class for calculating the * [[http://en.wikipedia.org/wiki/Decision_tree_learning#Gini_impurity Gini impurity]] * during binary classification. @@ -31,7 +30,6 @@ object Gini extends Impurity { /** * :: DeveloperApi :: - * * Gini coefficient calculation * @param c0 count of instances with label 0 * @param c1 count of instances with label 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala index f407796596..8eab247cf0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} /** * :: Experimental :: - * * Trait for calculating information gain. */ @Experimental @@ -29,7 +28,6 @@ trait Impurity extends Serializable { /** * :: DeveloperApi :: - * * information calculation for binary classification * @param c0 count of instances with label 0 * @param c1 count of instances with label 1 @@ -40,7 +38,6 @@ trait Impurity extends Serializable { /** * :: DeveloperApi :: - * * information calculation for regression * @param count number of instances * @param sum sum of labels diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala index 2c64644f4e..47d07122af 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Variance.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} /** * :: Experimental :: - * * Class for calculating variance during regression */ @Experimental @@ -31,7 +30,6 @@ object Variance extends Impurity { /** * :: DeveloperApi :: - * * variance calculation * @param count number of instances * @param sum sum of labels diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 0f76f4a049..bf692ca8c4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.linalg.Vector /** * :: Experimental :: - * * Model to store the decision tree parameters * @param topNode root node * @param algo algorithm type -- classification or regression diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala index d36b58e92c..cc8a24cce9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/InformationGainStats.scala @@ -21,7 +21,6 @@ import org.apache.spark.annotation.DeveloperApi /** * :: DeveloperApi :: - * * Information gain statistics for each split * @param gain information gain value * @param impurity current node impurity diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index 3399721414..682f213f41 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.linalg.Vector /** * :: DeveloperApi :: - * * Node in a decision tree * @param id integer node id * @param predict predicted value at the node diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala index 8bbb343079..d7ffd386c0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala @@ -22,7 +22,6 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType /** * :: DeveloperApi :: - * * Split applied to a feature * @param feature feature index * @param threshold threshold for continuous feature diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala index 230c409e1b..45f95482a1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.regression.LabeledPoint /** * :: DeveloperApi :: - * * A collection of methods used to validate data before applying ML algorithms. */ @DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala index e693d13703..6eaebaf7db 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala @@ -25,7 +25,6 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: - * * Generate test data for KMeans. This class first chooses k cluster centers * from a d-dimensional Gaussian distribution scaled by factor r and then creates a Gaussian * cluster with scale 1 around each center. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala index 140ff92869..c8e160d00c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala @@ -30,7 +30,6 @@ import org.apache.spark.mllib.regression.LabeledPoint /** * :: DeveloperApi :: - * * Generate sample data used for Linear Data. This class generates * uniformly random values for every feature and adds Gaussian noise with mean `eps` to the * response variable `Y`. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala index ca06b9ad58..c82cd8fd46 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala @@ -27,7 +27,6 @@ import org.apache.spark.mllib.linalg.Vectors /** * :: DeveloperApi :: - * * Generate test data for LogisticRegression. This class chooses positive labels * with probability `probOne` and scales features for positive examples by `eps`. */ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala index 3bd86d6813..3f413faca6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala @@ -27,29 +27,28 @@ import org.apache.spark.rdd.RDD /** * :: DeveloperApi :: + * Generate RDD(s) containing data for Matrix Factorization. * -* Generate RDD(s) containing data for Matrix Factorization. -* -* This method samples training entries according to the oversampling factor -* 'trainSampFact', which is a multiplicative factor of the number of -* degrees of freedom of the matrix: rank*(m+n-rank). -* -* It optionally samples entries for a testing matrix using -* 'testSampFact', the percentage of the number of training entries -* to use for testing. -* -* This method takes the following inputs: -* sparkMaster (String) The master URL. -* outputPath (String) Directory to save output. -* m (Int) Number of rows in data matrix. -* n (Int) Number of columns in data matrix. -* rank (Int) Underlying rank of data matrix. -* trainSampFact (Double) Oversampling factor. -* noise (Boolean) Whether to add gaussian noise to training data. -* sigma (Double) Standard deviation of added gaussian noise. -* test (Boolean) Whether to create testing RDD. -* testSampFact (Double) Percentage of training data to use as test data. -*/ + * This method samples training entries according to the oversampling factor + * 'trainSampFact', which is a multiplicative factor of the number of + * degrees of freedom of the matrix: rank*(m+n-rank). + * + * It optionally samples entries for a testing matrix using + * 'testSampFact', the percentage of the number of training entries + * to use for testing. + * + * This method takes the following inputs: + * sparkMaster (String) The master URL. + * outputPath (String) Directory to save output. + * m (Int) Number of rows in data matrix. + * n (Int) Number of columns in data matrix. + * rank (Int) Underlying rank of data matrix. + * trainSampFact (Double) Oversampling factor. + * noise (Boolean) Whether to add gaussian noise to training data. + * sigma (Double) Standard deviation of added gaussian noise. + * test (Boolean) Whether to create testing RDD. + * testSampFact (Double) Percentage of training data to use as test data. + */ @DeveloperApi object MFDataGenerator { def main(args: Array[String]) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 7f9804deaf..ac2360c429 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -124,7 +124,6 @@ object MLUtils { /** * :: Experimental :: - * * Load labeled data from a file. The data format used here is * , ... * where , are feature values in Double and is the corresponding label as Double. @@ -146,7 +145,6 @@ object MLUtils { /** * :: Experimental :: - * * Save labeled data to a file. The data format used here is * , ... * where , are feature values in Double and is the corresponding label as Double. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala index 87a6f2a0c3..ba8190b0e0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala @@ -29,7 +29,6 @@ import org.apache.spark.mllib.regression.LabeledPoint /** * :: DeveloperApi :: - * * Generate sample data used for SVM. This class generates uniform random values * for the features and adds Gaussian noise with weight 0.1 to generate labels. */ -- cgit v1.2.3