aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/SSLOptions.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala10
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala14
-rw-r--r--core/src/main/scala/org/apache/spark/io/CompressionCodec.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/RDD.scala18
-rw-r--r--core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala3
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala7
-rw-r--r--core/src/main/scala/org/apache/spark/ui/UIUtils.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/RpcUtils.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/StatCounter.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/util/ThreadUtils.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala10
-rw-r--r--core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/Graph.scala4
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala4
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala3
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala2
-rw-r--r--mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/Predictor.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala74
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala1
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala42
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala14
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala4
-rwxr-xr-xmllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/package.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala27
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala38
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala13
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala28
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala21
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala13
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala12
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala20
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala24
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/package.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala7
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala11
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala2
-rw-r--r--pom.xml12
-rw-r--r--project/SparkBuild.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala17
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala62
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala24
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala20
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala10
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala2
132 files changed, 558 insertions, 499 deletions
diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
index be19179b00..5f14102c3c 100644
--- a/core/src/main/scala/org/apache/spark/SSLOptions.scala
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -150,8 +150,8 @@ private[spark] object SSLOptions extends Logging {
* $ - `[ns].enabledAlgorithms` - a comma separated list of ciphers
*
* For a list of protocols and ciphers supported by particular Java versions, you may go to
- * [[https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https Oracle
- * blog page]].
+ * <a href="https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https">
+ * Oracle blog page</a>.
*
* You can optionally specify the default configuration. If you do, for each setting which is
* missing in SparkConf, the corresponding setting is used from the default configuration.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index bff5a29bb6..d7e3a1b1be 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -405,7 +405,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* partitioning of the resulting key-value pair RDD by passing a Partitioner.
*
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
- * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+ * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
* will provide much better performance.
*/
def groupByKey(partitioner: Partitioner): JavaPairRDD[K, JIterable[V]] =
@@ -416,7 +416,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* resulting RDD with into `numPartitions` partitions.
*
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
- * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+ * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
* will provide much better performance.
*/
def groupByKey(numPartitions: Int): JavaPairRDD[K, JIterable[V]] =
@@ -546,7 +546,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* resulting RDD with the existing partitioner/parallelism level.
*
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
- * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+ * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
* will provide much better performance.
*/
def groupByKey(): JavaPairRDD[K, JIterable[V]] =
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index ccd94f876e..a20d264be5 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -103,10 +103,10 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
* @param fraction expected size of the sample as a fraction of this RDD's size
* without replacement: probability that each element is chosen; fraction must be [0, 1]
- * with replacement: expected number of times each element is chosen; fraction must be >= 0
+ * with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
- * of the given [[RDD]].
+ * of the given `RDD`.
*/
def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
sample(withReplacement, fraction, Utils.random.nextLong)
@@ -117,11 +117,11 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
* @param fraction expected size of the sample as a fraction of this RDD's size
* without replacement: probability that each element is chosen; fraction must be [0, 1]
- * with replacement: expected number of times each element is chosen; fraction must be >= 0
+ * with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
* @param seed seed for the random number generator
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
- * of the given [[RDD]].
+ * of the given `RDD`.
*/
def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
wrapRDD(rdd.sample(withReplacement, fraction, seed))
@@ -167,7 +167,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
* Return an RDD with the elements from `this` that are not in `other`.
*
* Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
- * RDD will be <= us.
+ * RDD will be &lt;= us.
*/
def subtract(other: JavaRDD[T]): JavaRDD[T] = wrapRDD(rdd.subtract(other))
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 38d347aeab..9481156bc9 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -238,7 +238,9 @@ class JavaSparkContext(val sc: SparkContext)
* }}}
*
* Do
- * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+ * {{{
+ * JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")
+ * }}}
*
* then `rdd` contains
* {{{
@@ -270,7 +272,9 @@ class JavaSparkContext(val sc: SparkContext)
* }}}
*
* Do
- * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+ * {{{
+ * JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")
+ * }}},
*
* then `rdd` contains
* {{{
@@ -749,7 +753,7 @@ class JavaSparkContext(val sc: SparkContext)
/**
* Get a local property set in this thread, or null if it is missing. See
- * [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
+ * `org.apache.spark.api.java.JavaSparkContext.setLocalProperty`.
*/
def getLocalProperty(key: String): String = sc.getLocalProperty(key)
@@ -769,7 +773,7 @@ class JavaSparkContext(val sc: SparkContext)
* Application programmers can use this method to group all those jobs together and give a
* group description. Once set, the Spark web UI will associate such jobs with this group.
*
- * The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]]
+ * The application can also use `org.apache.spark.api.java.JavaSparkContext.cancelJobGroup`
* to cancel all running jobs in this group. For example,
* {{{
* // In the main thread:
@@ -802,7 +806,7 @@ class JavaSparkContext(val sc: SparkContext)
/**
* Cancel active jobs for the specified group. See
- * [[org.apache.spark.api.java.JavaSparkContext.setJobGroup]] for more information.
+ * `org.apache.spark.api.java.JavaSparkContext.setJobGroup` for more information.
*/
def cancelJobGroup(groupId: String): Unit = sc.cancelJobGroup(groupId)
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 6ba79e506a..2e991ce394 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -172,7 +172,7 @@ private final object SnappyCompressionCodec {
}
/**
- * Wrapper over [[SnappyOutputStream]] which guards against write-after-close and double-close
+ * Wrapper over `SnappyOutputStream` which guards against write-after-close and double-close
* issues. See SPARK-7660 for more details. This wrapping can be removed if we upgrade to a version
* of snappy-java that contains the fix for https://github.com/xerial/snappy-java/issues/107.
*/
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index bff2b8f1d0..8e67344758 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -70,8 +70,8 @@ import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, Poi
* All of the scheduling and execution in Spark is done based on these methods, allowing each RDD
* to implement its own way of computing itself. Indeed, users can implement custom RDDs (e.g. for
* reading data from a new storage system) by overriding these functions. Please refer to the
- * [[http://people.csail.mit.edu/matei/papers/2012/nsdi_spark.pdf Spark paper]] for more details
- * on RDD internals.
+ * <a href="http://people.csail.mit.edu/matei/papers/2012/nsdi_spark.pdf">Spark paper</a>
+ * for more details on RDD internals.
*/
abstract class RDD[T: ClassTag](
@transient private var _sc: SparkContext,
@@ -469,7 +469,7 @@ abstract class RDD[T: ClassTag](
* @param withReplacement can elements be sampled multiple times (replaced when sampled out)
* @param fraction expected size of the sample as a fraction of this RDD's size
* without replacement: probability that each element is chosen; fraction must be [0, 1]
- * with replacement: expected number of times each element is chosen; fraction must be >= 0
+ * with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
* @param seed seed for the random number generator
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
@@ -675,8 +675,8 @@ abstract class RDD[T: ClassTag](
* may even differ each time the resulting RDD is evaluated.
*
* @note This operation may be very expensive. If you are grouping in order to perform an
- * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
- * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+ * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+ * or `PairRDDFunctions.reduceByKey` will provide much better performance.
*/
def groupBy[K](f: T => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = withScope {
groupBy[K](f, defaultPartitioner(this))
@@ -688,8 +688,8 @@ abstract class RDD[T: ClassTag](
* may even differ each time the resulting RDD is evaluated.
*
* @note This operation may be very expensive. If you are grouping in order to perform an
- * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
- * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+ * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+ * or `PairRDDFunctions.reduceByKey` will provide much better performance.
*/
def groupBy[K](
f: T => K,
@@ -703,8 +703,8 @@ abstract class RDD[T: ClassTag](
* may even differ each time the resulting RDD is evaluated.
*
* @note This operation may be very expensive. If you are grouping in order to perform an
- * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
- * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+ * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+ * or `PairRDDFunctions.reduceByKey` will provide much better performance.
*/
def groupBy[K](f: T => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K] = null)
: RDD[(K, Iterable[T])] = withScope {
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index 8f15f50bee..f41fc38be2 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -46,7 +46,7 @@ private[spark] object CryptoStreamUtils extends Logging {
val COMMONS_CRYPTO_CONF_PREFIX = "commons.crypto."
/**
- * Helper method to wrap [[OutputStream]] with [[CryptoOutputStream]] for encryption.
+ * Helper method to wrap `OutputStream` with `CryptoOutputStream` for encryption.
*/
def createCryptoOutputStream(
os: OutputStream,
@@ -62,7 +62,7 @@ private[spark] object CryptoStreamUtils extends Logging {
}
/**
- * Helper method to wrap [[InputStream]] with [[CryptoInputStream]] for decryption.
+ * Helper method to wrap `InputStream` with `CryptoInputStream` for decryption.
*/
def createCryptoInputStream(
is: InputStream,
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 19e020c968..7eb2da1c27 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -43,7 +43,8 @@ import org.apache.spark.util.{BoundedPriorityQueue, SerializableConfiguration, S
import org.apache.spark.util.collection.CompactBuffer
/**
- * A Spark serializer that uses the [[https://code.google.com/p/kryo/ Kryo serialization library]].
+ * A Spark serializer that uses the <a href="https://code.google.com/p/kryo/">
+ * Kryo serialization library</a>.
*
* @note This serializer is not guaranteed to be wire-compatible across different versions of
* Spark. It is intended to be used to serialize/de-serialize data within a single
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
index bf087af16a..bb8a684b4c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
@@ -89,17 +89,18 @@ class RandomBlockReplicationPolicy
prioritizedPeers
}
+ // scalastyle:off line.size.limit
/**
* Uses sampling algorithm by Robert Floyd. Finds a random sample in O(n) while
- * minimizing space usage
- * [[http://math.stackexchange.com/questions/178690/
- * whats-the-proof-of-correctness-for-robert-floyds-algorithm-for-selecting-a-sin]]
+ * minimizing space usage. Please see <a href="http://math.stackexchange.com/questions/178690/whats-the-proof-of-correctness-for-robert-floyds-algorithm-for-selecting-a-sin">
+ * here</a>.
*
* @param n total number of indices
* @param m number of samples needed
* @param r random number generator
* @return list of m random unique indices
*/
+ // scalastyle:on line.size.limit
private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = {
val indices = (n - m + 1 to n).foldLeft(Set.empty[Int]) {case (set, i) =>
val t = r.nextInt(i) + 1
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 57f6f2f0a9..dbeb970c81 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -422,8 +422,8 @@ private[spark] object UIUtils extends Logging {
* the whole string will rendered as a simple escaped text.
*
* Note: In terms of security, only anchor tags with root relative links are supported. So any
- * attempts to embed links outside Spark UI, or other tags like <script> will cause in the whole
- * description to be treated as plain text.
+ * attempts to embed links outside Spark UI, or other tags like &lt;script&gt; will cause in
+ * the whole description to be treated as plain text.
*
* @param desc the original job or stage description string, which may contain html tags.
* @param basePathUri with which to prepend the relative links; this is used when plainText is
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 1326f0977c..00e0cf257c 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -224,7 +224,7 @@ private[spark] object AccumulatorContext {
* Registers an [[AccumulatorV2]] created on the driver such that it can be used on the executors.
*
* All accumulators registered here can later be used as a container for accumulating partial
- * values across multiple tasks. This is what [[org.apache.spark.scheduler.DAGScheduler]] does.
+ * values across multiple tasks. This is what `org.apache.spark.scheduler.DAGScheduler` does.
* Note: if an accumulator is registered here, it should also be registered with the active
* context cleaner for cleanup so as to avoid memory leaks.
*
diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
index e3b588374c..46a5cb2cff 100644
--- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
@@ -23,7 +23,7 @@ import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, RpcTimeout}
private[spark] object RpcUtils {
/**
- * Retrieve a [[RpcEndpointRef]] which is located in the driver via its name.
+ * Retrieve a `RpcEndpointRef` which is located in the driver via its name.
*/
def makeDriverRef(name: String, conf: SparkConf, rpcEnv: RpcEnv): RpcEndpointRef = {
val driverHost: String = conf.get("spark.driver.host", "localhost")
diff --git a/core/src/main/scala/org/apache/spark/util/StatCounter.scala b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
index 45381365f1..1e02638591 100644
--- a/core/src/main/scala/org/apache/spark/util/StatCounter.scala
+++ b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
@@ -22,8 +22,8 @@ import org.apache.spark.annotation.Since
/**
* A class for tracking the statistics of a set of numbers (count, mean and variance) in a
* numerically robust way. Includes support for merging two StatCounters. Based on Welford
- * and Chan's [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance algorithms]]
- * for running variance.
+ * and Chan's <a href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * algorithms</a> for running variance.
*
* @constructor Initialize the StatCounter with the given values.
*/
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index d093e7bfc3..60a6e82c6f 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -180,8 +180,8 @@ private[spark] object ThreadUtils {
// scalastyle:off awaitresult
/**
- * Preferred alternative to [[Await.result()]]. This method wraps and re-throws any exceptions
- * thrown by the underlying [[Await]] call, ensuring that this thread's stack trace appears in
+ * Preferred alternative to `Await.result()`. This method wraps and re-throws any exceptions
+ * thrown by the underlying `Await` call, ensuring that this thread's stack trace appears in
* logs.
*/
@throws(classOf[SparkException])
@@ -196,7 +196,7 @@ private[spark] object ThreadUtils {
}
/**
- * Calls [[Awaitable.result]] directly to avoid using `ForkJoinPool`'s `BlockingContext`, wraps
+ * Calls `Awaitable.result` directly to avoid using `ForkJoinPool`'s `BlockingContext`, wraps
* and re-throws any exceptions with nice stack track.
*
* Codes running in the user's thread may be in a thread of Scala ForkJoinPool. As concurrent
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 748d729554..f051860a23 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1673,8 +1673,8 @@ private[spark] object Utils extends Logging {
}
/**
- * NaN-safe version of [[java.lang.Double.compare()]] which allows NaN values to be compared
- * according to semantics where NaN == NaN and NaN > any non-NaN double.
+ * NaN-safe version of `java.lang.Double.compare()` which allows NaN values to be compared
+ * according to semantics where NaN == NaN and NaN &gt; any non-NaN double.
*/
def nanSafeCompareDoubles(x: Double, y: Double): Int = {
val xIsNan: Boolean = java.lang.Double.isNaN(x)
@@ -1687,8 +1687,8 @@ private[spark] object Utils extends Logging {
}
/**
- * NaN-safe version of [[java.lang.Float.compare()]] which allows NaN values to be compared
- * according to semantics where NaN == NaN and NaN > any non-NaN float.
+ * NaN-safe version of `java.lang.Float.compare()` which allows NaN values to be compared
+ * according to semantics where NaN == NaN and NaN &gt; any non-NaN float.
*/
def nanSafeCompareFloats(x: Float, y: Float): Int = {
val xIsNan: Boolean = java.lang.Float.isNaN(x)
@@ -2354,7 +2354,7 @@ private[spark] object Utils extends Logging {
* A spark url (`spark://host:port`) is a special URI that its scheme is `spark` and only contains
* host and port.
*
- * @throws SparkException if `sparkUrl` is invalid.
+ * @note Throws `SparkException` if sparkUrl is invalid.
*/
def extractHostPortFromSparkUrl(sparkUrl: String): (String, Int) = {
try {
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index 89b0874e38..da08661d13 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -148,7 +148,7 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
/**
* Reads data from a ChunkedByteBuffer.
*
- * @param dispose if true, [[ChunkedByteBuffer.dispose()]] will be called at the end of the stream
+ * @param dispose if true, `ChunkedByteBuffer.dispose()` will be called at the end of the stream
* in order to close any memory-mapped files which back the buffer.
*/
private class ChunkedByteBufferInputStream(
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 922ec7955f..c55a5885ba 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -54,8 +54,8 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
*
* @return an RDD containing the edges in this graph
*
- * @see [[Edge]] for the edge type.
- * @see [[Graph#triplets]] to get an RDD which contains all the edges
+ * @see `Edge` for the edge type.
+ * @see `Graph#triplets` to get an RDD which contains all the edges
* along with their vertex data.
*
*/
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index f678e5f123..add21f41ea 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -32,7 +32,7 @@ object GraphLoader extends Logging {
* id and a target id. Skips lines that begin with `#`.
*
* If desired the edges can be automatically oriented in the positive
- * direction (source Id < target Id) by setting `canonicalOrientation` to
+ * direction (source Id &lt; target Id) by setting `canonicalOrientation` to
* true.
*
* @example Loads a file in the following format:
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index 98e082cc44..faa985594e 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -41,7 +41,7 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
/**
* If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
- * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+ * `PartitionID`s in `partitionsRDD` correspond to the actual partitions and create a new
* partitioner that allows co-partitioning with `partitionsRDD`.
*/
override val partitioner =
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index f926984aa6..feb3f47667 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -28,7 +28,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
/**
* PageRank algorithm implementation. There are two implementations of PageRank implemented.
*
- * The first implementation uses the standalone [[Graph]] interface and runs PageRank
+ * The first implementation uses the standalone `Graph` interface and runs PageRank
* for a fixed number of iterations:
* {{{
* var PR = Array.fill(n)( 1.0 )
@@ -41,7 +41,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
* }
* }}}
*
- * The second implementation uses the [[Pregel]] interface and runs PageRank until
+ * The second implementation uses the `Pregel` interface and runs PageRank until
* convergence:
*
* {{{
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index bb2ffab0f6..59fdd855e6 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -42,7 +42,8 @@ object SVDPlusPlus {
/**
* Implement SVD++ based on "Factorization Meets the Neighborhood:
* a Multifaceted Collaborative Filtering Model",
- * available at [[http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf]].
+ * available at <a href="http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf">
+ * here</a>.
*
* The prediction rule is rui = u + bu + bi + qi*(pu + |N(u)|^^-0.5^^*sum(y)),
* see the details on page 6.
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 34e9e22c3a..21b22968a1 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -36,7 +36,7 @@ import org.apache.spark.graphx._
* self cycles and canonicalizes the graph to ensure that the following conditions hold:
* <ul>
* <li> There are no self edges</li>
- * <li> All edges are oriented src > dst</li>
+ * <li> All edges are oriented src &gt; dst</li>
* <li> There are no duplicate edges</li>
* </ul>
* However, the canonicalization procedure is costly as it requires repartitioning the graph.
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
index 0be28677ef..3167e0c286 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
@@ -28,7 +28,8 @@ import org.apache.spark.ml.linalg.{Matrices, Matrix, Vector, Vectors}
* This class provides basic functionality for a Multivariate Gaussian (Normal) Distribution. In
* the event that the covariance matrix is singular, the density will be computed in a
* reduced dimensional subspace under which the distribution is supported.
- * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]])
+ * (see <a href="http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case">
+ * here</a>)
*
* @param mean The mean vector of the distribution
* @param cov The covariance matrix of the distribution
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index aa92edde7a..4b43a3aa5b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -59,7 +59,7 @@ private[ml] trait PredictorParams extends Params
/**
* :: DeveloperApi ::
* Abstraction for prediction problems (regression and classification). It accepts all NumericType
- * labels and will automatically cast it to DoubleType in [[fit()]].
+ * labels and will automatically cast it to DoubleType in `fit()`.
*
* @tparam FeaturesType Type of features.
* E.g., [[org.apache.spark.mllib.linalg.VectorUDT]] for vector features.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
index 12b9732a4c..527cb2d547 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
@@ -239,7 +239,7 @@ object AttributeGroup {
}
}
- /** Creates an attribute group from a [[StructField]] instance. */
+ /** Creates an attribute group from a `StructField` instance. */
def fromStructField(field: StructField): AttributeGroup = {
require(field.dataType == new VectorUDT)
if (field.metadata.contains(ML_ATTR)) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 27554acdf3..cc7e8bc301 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -98,7 +98,7 @@ sealed abstract class Attribute extends Serializable {
def toMetadata(): Metadata = toMetadata(Metadata.empty)
/**
- * Converts to a [[StructField]] with some existing metadata.
+ * Converts to a `StructField` with some existing metadata.
* @param existingMetadata existing metadata to carry over
*/
def toStructField(existingMetadata: Metadata): StructField = {
@@ -109,7 +109,7 @@ sealed abstract class Attribute extends Serializable {
StructField(name.get, DoubleType, nullable = false, newMetadata)
}
- /** Converts to a [[StructField]]. */
+ /** Converts to a `StructField`. */
def toStructField(): StructField = toStructField(Metadata.empty)
override def toString: String = toMetadataImpl(withType = true).toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index d07b4adebb..fe29926e0d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -56,13 +56,13 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
/**
* Set threshold in binary classification, in range [0, 1].
*
- * If the estimated probability of class label 1 is > threshold, then predict 1, else 0.
+ * If the estimated probability of class label 1 is &gt; threshold, then predict 1, else 0.
* A high threshold encourages the model to predict 0 more often;
* a low threshold encourages the model to predict 1 more often.
*
* Note: Calling this with threshold p is equivalent to calling `setThresholds(Array(1-p, p))`.
- * When [[setThreshold()]] is called, any user-set value for [[thresholds]] will be cleared.
- * If both [[threshold]] and [[thresholds]] are set in a ParamMap, then they must be
+ * When `setThreshold()` is called, any user-set value for `thresholds` will be cleared.
+ * If both `threshold` and `thresholds` are set in a ParamMap, then they must be
* equivalent.
*
* Default is 0.5.
@@ -101,12 +101,12 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
/**
* Get threshold for binary classification.
*
- * If [[thresholds]] is set with length 2 (i.e., binary classification),
+ * If `thresholds` is set with length 2 (i.e., binary classification),
* this returns the equivalent threshold: {{{1 / (1 + thresholds(0) / thresholds(1))}}}.
- * Otherwise, returns [[threshold]] if set, or its default value if unset.
+ * Otherwise, returns `threshold` if set, or its default value if unset.
*
* @group getParam
- * @throws IllegalArgumentException if [[thresholds]] is set to an array of length other than 2.
+ * @throws IllegalArgumentException if `thresholds` is set to an array of length other than 2.
*/
override def getThreshold: Double = {
checkThresholdConsistency()
@@ -122,13 +122,13 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
/**
* Set thresholds in multiclass (or binary) classification to adjust the probability of
- * predicting each class. Array must have length equal to the number of classes, with values > 0,
- * excepting that at most one value may be 0.
+ * predicting each class. Array must have length equal to the number of classes,
+ * with values &gt; 0, excepting that at most one value may be 0.
* The class with largest value p/t is predicted, where p is the original probability of that
* class and t is the class's threshold.
*
- * Note: When [[setThresholds()]] is called, any user-set value for [[threshold]] will be cleared.
- * If both [[threshold]] and [[thresholds]] are set in a ParamMap, then they must be
+ * Note: When `setThresholds()` is called, any user-set value for `threshold` will be cleared.
+ * If both `threshold` and `thresholds` are set in a ParamMap, then they must be
* equivalent.
*
* @group setParam
@@ -141,8 +141,8 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
/**
* Get thresholds for binary or multiclass classification.
*
- * If [[thresholds]] is set, return its value.
- * Otherwise, if [[threshold]] is set, return the equivalent thresholds for binary
+ * If `thresholds` is set, return its value.
+ * Otherwise, if `threshold` is set, return the equivalent thresholds for binary
* classification: (1-threshold, threshold).
* If neither are set, throw an exception.
*
@@ -159,9 +159,9 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
}
/**
- * If [[threshold]] and [[thresholds]] are both set, ensures they are consistent.
+ * If `threshold` and `thresholds` are both set, ensures they are consistent.
*
- * @throws IllegalArgumentException if [[threshold]] and [[thresholds]] are not equivalent
+ * @throws IllegalArgumentException if `threshold` and `thresholds` are not equivalent
*/
protected def checkThresholdConsistency(): Unit = {
if (isSet(threshold) && isSet(thresholds)) {
@@ -207,7 +207,7 @@ class LogisticRegression @Since("1.2.0") (
/**
* Set the ElasticNet mixing parameter.
* For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
- * For 0 < alpha < 1, the penalty is a combination of L1 and L2.
+ * For 0 &lt; alpha &lt; 1, the penalty is a combination of L1 and L2.
* Default is 0.0 which is an L2 penalty.
*
* @group setParam
@@ -294,7 +294,7 @@ class LogisticRegression @Since("1.2.0") (
override def getThresholds: Array[Double] = super.getThresholds
/**
- * Suggested depth for treeAggregate (>= 2).
+ * Suggested depth for treeAggregate (&gt;= 2).
* If the dimensions of features or the number of partitions are large,
* this param could be adjusted to a larger size.
* Default is 2.
@@ -815,7 +815,7 @@ class LogisticRegressionModel private[spark] (
/**
* Predict label for the given feature vector.
- * The behavior of this can be adjusted using [[thresholds]].
+ * The behavior of this can be adjusted using `thresholds`.
*/
override protected def predict(features: Vector): Double = if (isMultinomial) {
super.predict(features)
@@ -1274,7 +1274,7 @@ class BinaryLogisticRegressionSummary private[classification] (
*
* The probability of the multinomial outcome $y$ taking on any of the K possible outcomes is:
*
- * <p><blockquote>
+ * <blockquote>
* $$
* P(y_i=0|\vec{x}_i, \beta) = \frac{e^{\vec{x}_i^T \vec{\beta}_0}}{\sum_{k=0}^{K-1}
* e^{\vec{x}_i^T \vec{\beta}_k}} \\
@@ -1283,7 +1283,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* P(y_i=K-1|\vec{x}_i, \beta) = \frac{e^{\vec{x}_i^T \vec{\beta}_{K-1}}\,}{\sum_{k=0}^{K-1}
* e^{\vec{x}_i^T \vec{\beta}_k}}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* The model coefficients $\beta = (\beta_0, \beta_1, \beta_2, ..., \beta_{K-1})$ become a matrix
* which has dimension of $K \times (N+1)$ if the intercepts are added. If the intercepts are not
@@ -1292,7 +1292,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Note that the coefficients in the model above lack identifiability. That is, any constant scalar
* can be added to all of the coefficients and the probabilities remain the same.
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* \frac{e^{\vec{x}_i^T \left(\vec{\beta}_0 + \vec{c}\right)}}{\sum_{k=0}^{K-1}
@@ -1302,7 +1302,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* = \frac{e^{\vec{x}_i^T \vec{\beta}_0}}{\sum_{k=0}^{K-1} e^{\vec{x}_i^T \vec{\beta}_k}}
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* However, when regularization is added to the loss function, the coefficients are indeed
* identifiable because there is only one set of coefficients which minimizes the regularization
@@ -1314,7 +1314,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* The loss of objective function for a single instance of data (we do not include the
* regularization term here for simplicity) can be written as
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* \ell\left(\beta, x_i\right) &= -log{P\left(y_i \middle| \vec{x}_i, \beta\right)} \\
@@ -1322,14 +1322,14 @@ class BinaryLogisticRegressionSummary private[classification] (
* &= log\left(\sum_{k=0}^{K-1} e^{margins_k}\right) - margins_y
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where ${margins}_k = \vec{x}_i^T \vec{\beta}_k$.
*
* For optimization, we have to calculate the first derivative of the loss function, and a simple
* calculation shows that
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* \frac{\partial \ell(\beta, \vec{x}_i, w_i)}{\partial \beta_{j, k}}
@@ -1338,54 +1338,54 @@ class BinaryLogisticRegressionSummary private[classification] (
* &= x_{i, j} \cdot w_i \cdot multiplier_k
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where $w_i$ is the sample weight, $I_{y=k}$ is an indicator function
*
- * <p><blockquote>
+ * <blockquote>
* $$
* I_{y=k} = \begin{cases}
* 1 & y = k \\
* 0 & else
* \end{cases}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* and
*
- * <p><blockquote>
+ * <blockquote>
* $$
* multiplier_k = \left(\frac{e^{\vec{x}_i \cdot \vec{\beta}_k}}{\sum_{k=0}^{K-1}
* e^{\vec{x}_i \cdot \vec{\beta}_k}} - I_{y=k}\right)
* $$
- * </blockquote></p>
+ * </blockquote>
*
* If any of margins is larger than 709.78, the numerical computation of multiplier and loss
* function will suffer from arithmetic overflow. This issue occurs when there are outliers in
* data which are far away from the hyperplane, and this will cause the failing of training once
- * infinity is introduced. Note that this is only a concern when max(margins) > 0.
+ * infinity is introduced. Note that this is only a concern when max(margins) &gt; 0.
*
- * Fortunately, when max(margins) = maxMargin > 0, the loss function and the multiplier can easily
- * be rewritten into the following equivalent numerically stable formula.
+ * Fortunately, when max(margins) = maxMargin &gt; 0, the loss function and the multiplier can
+ * easily be rewritten into the following equivalent numerically stable formula.
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \ell\left(\beta, x\right) = log\left(\sum_{k=0}^{K-1} e^{margins_k - maxMargin}\right) -
* margins_{y} + maxMargin
* $$
- * </blockquote></p>
+ * </blockquote>
*
* Note that each term, $(margins_k - maxMargin)$ in the exponential is no greater than zero; as a
* result, overflow will not happen with this formula.
*
* For $multiplier$, a similar trick can be applied as the following,
*
- * <p><blockquote>
+ * <blockquote>
* $$
* multiplier_k = \left(\frac{e^{\vec{x}_i \cdot \vec{\beta}_k - maxMargin}}{\sum_{k'=0}^{K-1}
* e^{\vec{x}_i \cdot \vec{\beta}_{k'} - maxMargin}} - I_{y=k}\right)
* $$
- * </blockquote></p>
+ * </blockquote>
*
* @param bcCoefficients The broadcast coefficients corresponding to the features.
* @param bcFeaturesStd The broadcast standard deviation values of the features.
@@ -1513,7 +1513,7 @@ private class LogisticAggregator(
}
/**
- * When maxMargin > 0, the original formula could cause overflow.
+ * When maxMargin &gt; 0, the original formula could cause overflow.
* We address this by subtracting maxMargin from all the margins, so it's guaranteed
* that all of the new margins will be smaller than zero to prevent arithmetic overflow.
*/
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 88fe7cb4a6..1b45eafbac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -289,7 +289,6 @@ object MultilayerPerceptronClassifier
* @param uid uid
* @param layers array of layer sizes including input and output layers
* @param weights the weights of layers
- * @return prediction model
*/
@Since("1.5.0")
@Experimental
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index f1a7676c74..a2ac700000 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -60,16 +60,20 @@ private[ml] trait NaiveBayesParams extends PredictorParams with HasWeightCol {
final def getModelType: String = $(modelType)
}
+// scalastyle:off line.size.limit
/**
* Naive Bayes Classifiers.
* It supports Multinomial NB
- * ([[http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html]])
+ * (see <a href="http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html">
+ * here</a>)
* which can handle finitely supported discrete data. For example, by converting documents into
* TF-IDF vectors, it can be used for document classification. By making every vector a
* binary (0/1) data, it can also be used as Bernoulli NB
- * ([[http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html]]).
+ * (see <a href="http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html">
+ * here</a>).
* The input feature values must be nonnegative.
*/
+// scalastyle:on line.size.limit
@Since("1.5.0")
class NaiveBayes @Since("1.5.0") (
@Since("1.5.0") override val uid: String)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 52345b0626..907c73e2e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.functions._
/**
- * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> learning algorithm for
* classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
@@ -144,7 +144,7 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi
}
/**
- * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for classification.
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> model for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*
@@ -249,7 +249,7 @@ class RandomForestClassificationModel private[ml] (
* (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
* and follows the implementation from scikit-learn.
*
- * @see [[DecisionTreeClassificationModel.featureImportances]]
+ * @see `DecisionTreeClassificationModel.featureImportances`
*/
@Since("1.5.0")
lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index cf11ba37ab..c7a170ddc7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -42,7 +42,7 @@ private[clustering] trait BisectingKMeansParams extends Params
with HasMaxIter with HasFeaturesCol with HasSeed with HasPredictionCol {
/**
- * The desired number of leaf clusters. Must be > 1. Default: 4.
+ * The desired number of leaf clusters. Must be &gt; 1. Default: 4.
* The actual number could be smaller if there are no divisible leaf clusters.
* @group param
*/
@@ -55,8 +55,8 @@ private[clustering] trait BisectingKMeansParams extends Params
def getK: Int = $(k)
/**
- * The minimum number of points (if >= 1.0) or the minimum proportion
- * of points (if < 1.0) of a divisible cluster (default: 1.0).
+ * The minimum number of points (if &gt;= 1.0) or the minimum proportion
+ * of points (if &lt; 1.0) of a divisible cluster (default: 1.0).
* @group expertParam
*/
@Since("2.0.0")
@@ -208,9 +208,9 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
* If bisecting all divisible clusters on the bottom level would result more than `k` leaf clusters,
* larger clusters get higher priority.
*
- * @see [[http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf
- * Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
- * KDD Workshop on Text Mining, 2000.]]
+ * @see <a href="http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf">
+ * Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
+ * KDD Workshop on Text Mining, 2000.</a>
*/
@Since("2.0.0")
@Experimental
@@ -296,7 +296,7 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] {
* :: Experimental ::
* Summary of BisectingKMeans.
*
- * @param predictions [[DataFrame]] produced by [[BisectingKMeansModel.transform()]].
+ * @param predictions `DataFrame` produced by `BisectingKMeansModel.transform()`.
* @param predictionCol Name for column of predicted clusters in `predictions`.
* @param featuresCol Name for column of features in `predictions`.
* @param k Number of clusters.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
index 8b5f525194..44e832b058 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.{DataFrame, Row}
* :: Experimental ::
* Summary of clustering algorithms.
*
- * @param predictions [[DataFrame]] produced by model.transform().
+ * @param predictions `DataFrame` produced by model.transform().
* @param predictionCol Name for column of predicted clusters in `predictions`.
* @param featuresCol Name for column of features in `predictions`.
* @param k Number of clusters.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 19998ca44b..74109344aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -44,7 +44,7 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
with HasSeed with HasPredictionCol with HasProbabilityCol with HasTol {
/**
- * Number of independent Gaussians in the mixture model. Must be > 1. Default: 2.
+ * Number of independent Gaussians in the mixture model. Must be &gt; 1. Default: 2.
* @group param
*/
@Since("2.0.0")
@@ -76,7 +76,7 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
* @param weights Weight for each Gaussian distribution in the mixture.
* This is a multinomial probability distribution over the k Gaussians,
* where weights(i) is the weight for Gaussian i, and weights sum to 1.
- * @param gaussians Array of [[MultivariateGaussian]] where gaussians(i) represents
+ * @param gaussians Array of `MultivariateGaussian` where gaussians(i) represents
* the Multivariate Gaussian (Normal) Distribution for Gaussian i
*/
@Since("2.0.0")
@@ -374,7 +374,7 @@ object GaussianMixture extends DefaultParamsReadable[GaussianMixture] {
* :: Experimental ::
* Summary of GaussianMixture.
*
- * @param predictions [[DataFrame]] produced by [[GaussianMixtureModel.transform()]].
+ * @param predictions `DataFrame` produced by `GaussianMixtureModel.transform()`.
* @param predictionCol Name for column of predicted clusters in `predictions`.
* @param probabilityCol Name for column of predicted probability of each cluster
* in `predictions`.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 152bd13b7a..6e124eb6dd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -42,7 +42,7 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
with HasSeed with HasPredictionCol with HasTol {
/**
- * The number of clusters to create (k). Must be > 1. Note that it is possible for fewer than
+ * The number of clusters to create (k). Must be &gt; 1. Note that it is possible for fewer than
* k clusters to be returned, for example, if there are fewer than k distinct points to cluster.
* Default: 2.
* @group param
@@ -72,7 +72,7 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
/**
* Param for the number of steps for the k-means|| initialization mode. This is an advanced
- * setting -- the default of 2 is almost always enough. Must be > 0. Default: 2.
+ * setting -- the default of 2 is almost always enough. Must be &gt; 0. Default: 2.
* @group expertParam
*/
@Since("1.5.0")
@@ -250,7 +250,7 @@ object KMeansModel extends MLReadable[KMeansModel] {
* :: Experimental ::
* K-means clustering with support for k-means|| initialization proposed by Bahmani et al.
*
- * @see [[http://dx.doi.org/10.14778/2180912.2180915 Bahmani et al., Scalable k-means++.]]
+ * @see <a href="http://dx.doi.org/10.14778/2180912.2180915">Bahmani et al., Scalable k-means++.</a>
*/
@Since("1.5.0")
@Experimental
@@ -346,7 +346,7 @@ object KMeans extends DefaultParamsReadable[KMeans] {
* :: Experimental ::
* Summary of KMeans.
*
- * @param predictions [[DataFrame]] produced by [[KMeansModel.transform()]].
+ * @param predictions `DataFrame` produced by `KMeansModel.transform()`.
* @param predictionCol Name for column of predicted clusters in `predictions`.
* @param featuresCol Name for column of features in `predictions`.
* @param k Number of clusters.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 7773802854..6032ab3db9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -50,7 +50,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
with HasSeed with HasCheckpointInterval {
/**
- * Param for the number of topics (clusters) to infer. Must be > 1. Default: 10.
+ * Param for the number of topics (clusters) to infer. Must be &gt; 1. Default: 10.
*
* @group param
*/
@@ -78,13 +78,13 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
* - EM
* - Currently only supports symmetric distributions, so all values in the vector should be
* the same.
- * - Values should be > 1.0
+ * - Values should be &gt; 1.0
* - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows
* from Asuncion et al. (2009), who recommend a +1 adjustment for EM.
* - Online
- * - Values should be >= 0
+ * - Values should be &gt;= 0
* - default = uniformly (1.0 / k), following the implementation from
- * [[https://github.com/Blei-Lab/onlineldavb]].
+ * <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
*
* @group param
*/
@@ -120,13 +120,13 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
*
* Optimizer-specific parameter settings:
* - EM
- * - Value should be > 1.0
+ * - Value should be &gt; 1.0
* - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows
* Asuncion et al. (2009), who recommend a +1 adjustment for EM.
* - Online
- * - Value should be >= 0
+ * - Value should be &gt;= 0
* - default = (1.0 / k), following the implementation from
- * [[https://github.com/Blei-Lab/onlineldavb]].
+ * <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
*
* @group param
*/
@@ -162,11 +162,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
* - Online LDA:
* Hoffman, Blei and Bach. "Online Learning for Latent Dirichlet Allocation."
* Neural Information Processing Systems, 2010.
- * [[http://www.cs.columbia.edu/~blei/papers/HoffmanBleiBach2010b.pdf]]
+ * See <a href="http://www.cs.columbia.edu/~blei/papers/HoffmanBleiBach2010b.pdf">here</a>
* - EM:
* Asuncion et al. "On Smoothing and Inference for Topic Models."
* Uncertainty in Artificial Intelligence, 2009.
- * [[http://arxiv.org/pdf/1205.2662.pdf]]
+ * See <a href="http://arxiv.org/pdf/1205.2662.pdf">here</a>
*
* @group param
*/
@@ -245,9 +245,9 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
* Fraction of the corpus to be sampled and used in each iteration of mini-batch gradient descent,
* in range (0, 1].
*
- * Note that this should be adjusted in synch with [[LDA.maxIter]]
+ * Note that this should be adjusted in synch with `LDA.maxIter`
* so the entire corpus is used. Specifically, set both so that
- * maxIterations * miniBatchFraction >= 1.
+ * maxIterations * miniBatchFraction &gt;= 1.
*
* Note: This is the same as the `miniBatchFraction` parameter in
* [[org.apache.spark.mllib.clustering.OnlineLDAOptimizer]].
@@ -293,8 +293,8 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
* cause failures if a data partition is lost, so set this bit with care.
* Note that checkpoints will be cleaned up via reference counting, regardless.
*
- * See [[DistributedLDAModel.getCheckpointFiles]] for getting remaining checkpoints and
- * [[DistributedLDAModel.deleteCheckpointFiles]] for removing remaining checkpoints.
+ * See `DistributedLDAModel.getCheckpointFiles` for getting remaining checkpoints and
+ * `DistributedLDAModel.deleteCheckpointFiles` for removing remaining checkpoints.
*
* Default: true
*
@@ -431,7 +431,7 @@ sealed abstract class LDAModel private[ml] (
private[ml] def getEffectiveTopicConcentration: Double = getModel.topicConcentration
/**
- * The features for LDA should be a [[Vector]] representing the word counts in a document.
+ * The features for LDA should be a `Vector` representing the word counts in a document.
* The vector should be of length vocabSize, with counts for each term (word).
*
* @group setParam
@@ -650,7 +650,7 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] {
* for each training document.
*
* @param oldLocalModelOption Used to implement [[oldLocalModel]] as a lazy val, but keeping
- * [[copy()]] cheap.
+ * `copy()` cheap.
*/
@Since("1.6.0")
@Experimental
@@ -701,7 +701,7 @@ class DistributedLDAModel private[ml] (
* - Even with [[logPrior]], this is NOT the same as the data log likelihood given the
* hyperparameters.
* - This is computed from the topic distributions computed during training. If you call
- * [[logLikelihood()]] on the same training dataset, the topic distributions will be computed
+ * `logLikelihood()` on the same training dataset, the topic distributions will be computed
* again, possibly giving different results.
*/
@Since("1.6.0")
@@ -719,7 +719,7 @@ class DistributedLDAModel private[ml] (
/**
* :: DeveloperApi ::
*
- * If using checkpointing and [[LDA.keepLastCheckpoint]] is set to true, then there may be
+ * If using checkpointing and `LDA.keepLastCheckpoint` is set to true, then there may be
* saved checkpoint files. This method is provided so that users can manage those files.
*
* Note that removing the checkpoints can cause failures if a partition is lost and is needed
@@ -804,13 +804,13 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] {
*
* Input data (featuresCol):
* LDA is given a collection of documents as input data, via the featuresCol parameter.
- * Each document is specified as a [[Vector]] of length vocabSize, where each entry is the
+ * Each document is specified as a `Vector` of length vocabSize, where each entry is the
* count for the corresponding term (word) in the document. Feature transformers such as
* [[org.apache.spark.ml.feature.Tokenizer]] and [[org.apache.spark.ml.feature.CountVectorizer]]
* can be useful for converting text to word count vectors.
*
- * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
- * (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation">
+ * Latent Dirichlet allocation (Wikipedia)</a>
*/
@Since("1.6.0")
@Experimental
@@ -826,7 +826,7 @@ class LDA @Since("1.6.0") (
optimizeDocConcentration -> true, keepLastCheckpoint -> true)
/**
- * The features for LDA should be a [[Vector]] representing the word counts in a document.
+ * The features for LDA should be a `Vector` representing the word counts in a document.
* The vector should be of length vocabSize, with counts for each term (word).
*
* @group setParam
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 6ff36b35ca..682787a830 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -32,7 +32,8 @@ import org.apache.spark.sql.types.DataType
* It returns a real vector of the same length representing the DCT. The return vector is scaled
* such that the transform matrix is unitary (aka scaled DCT-II).
*
- * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
+ * More information on <a href="https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II">
+ * DCT-II in Discrete cosine transform (Wikipedia)</a>.
*/
@Since("1.5.0")
class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
index d9d0f32254..f37233e1ab 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
@@ -37,7 +37,8 @@ import org.apache.spark.sql.types.StructType
* where `k_i` is the i-th coefficient, and both `x` and `k_i` are from `Z_prime^*`
*
* Reference:
- * [[https://en.wikipedia.org/wiki/Perfect_hash_function Wikipedia on Perfect Hash Function]]
+ * <a href="https://en.wikipedia.org/wiki/Perfect_hash_function">
+ * Wikipedia on Perfect Hash Function</a>
*
* @param numEntries The number of entries of the hash functions.
* @param randCoefficients An array of random coefficients, each used by one hash function.
@@ -98,7 +99,7 @@ class MinHashModel private[ml] (
* as binary "1" values.
*
* References:
- * [[https://en.wikipedia.org/wiki/MinHash Wikipedia on MinHash]]
+ * <a href="https://en.wikipedia.org/wiki/MinHash">Wikipedia on MinHash</a>
*/
@Experimental
@Since("2.1.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index ccfb0ce8f8..19978c97d2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -78,11 +78,11 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
* statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
* feature E is calculated as:
*
- * <p><blockquote>
+ * <blockquote>
* $$
* Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min
* $$
- * </blockquote></p>
+ * </blockquote>
*
* For the case $E_{max} == E_{min}$, $Rescaled(e_i) = 0.5 * (max + min)$.
*
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 25fb6be5af..4be17da3e9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -30,10 +30,12 @@ import org.apache.spark.sql.types.DataType
/**
* Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
- * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
- * expansion of a product of sums expresses it as a sum of products by using the fact that
- * multiplication distributes over addition". Take a 2-variable feature vector as an example:
- * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
+ * which is available at
+ * <a href="http://en.wikipedia.org/wiki/Polynomial_expansion">Polynomial expansion (Wikipedia)</a>
+ * , "In mathematics, an expansion of a product of sums expresses it as a sum of products by using
+ * the fact that multiplication distributes over addition". Take a 2-variable feature vector
+ * as an example: `(x, y)`, if we want to expand it with degree 2, then we get
+ * `(x, x * x, y, x * y, y * y)`.
*/
@Since("1.4.0")
class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String)
@@ -76,11 +78,11 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str
* (n + d choose d) (including 1 and first-order values). For example, let f([a, b, c], 3) be the
* function that expands [a, b, c] to their monomials of degree 3. We have the following recursion:
*
- * <p><blockquote>
+ * <blockquote>
* $$
* f([a, b, c], 3) &= f([a, b], 3) ++ f([a, b], 2) * c ++ f([a, b], 1) * c^2 ++ [c^3]
* $$
- * </blockquote></p>
+ * </blockquote>
*
* To handle sparsity, if c is zero, we can skip all monomials that contain it. We remember the
* current index and increment it properly for sparse input.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
index 1b524c6710..2bff59a0da 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
@@ -113,8 +113,8 @@ class RandomProjectionModel private[ml] (
*
* References:
*
- * 1. [[https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions
- * Wikipedia on Stable Distributions]]
+ * 1. <a href="https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions">
+ * Wikipedia on Stable Distributions</a>
*
* 2. Wang, Jingdong et al. "Hashing for similarity search: A survey." arXiv preprint
* arXiv:1408.2927 (2014).
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index d76d556280..8f125d8fd5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -79,8 +79,8 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
* statistics on the samples in the training set.
*
* The "unit std" is computed using the
- * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
- * corrected sample standard deviation]],
+ * <a href="https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
+ * corrected sample standard deviation</a>,
* which is computed as the square root of the unbiased sample variance.
*/
@Since("1.2.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 0ced21365f..a55816249c 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
* @note null values from input array are preserved unless adding null to stopWords
* explicitly.
*
- * @see [[http://en.wikipedia.org/wiki/Stop_words]]
+ * @see <a href="http://en.wikipedia.org/wiki/Stop_words">Stop words (Wikipedia)</a>
*/
@Since("1.5.0")
class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String)
@@ -132,7 +132,8 @@ object StopWordsRemover extends DefaultParamsReadable[StopWordsRemover] {
* Loads the default stop words for the given language.
* Supported languages: danish, dutch, english, finnish, french, german, hungarian,
* italian, norwegian, portuguese, russian, spanish, swedish, turkish
- * @see [[http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/]]
+ * @see <a href="http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/">
+ * here</a>
*/
@Since("2.0.0")
def loadDefaultStopWords(language: String): Array[String] = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
index b94187ae78..5dd648aecc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
@@ -84,6 +84,7 @@ import org.apache.spark.sql.DataFrame
* input dataset, while MLlib's feature transformers operate lazily on individual columns,
* which is more efficient and flexible to handle large and complex datasets.
*
- * @see [[http://scikit-learn.org/stable/modules/preprocessing.html scikit-learn.preprocessing]]
+ * @see <a href="http://scikit-learn.org/stable/modules/preprocessing.html">
+ * scikit-learn.preprocessing</a>
*/
package object feature
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
index 8a6b862cda..143bf539b0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
@@ -50,9 +50,10 @@ private[ml] class IterativelyReweightedLeastSquaresModel(
* @param maxIter maximum number of iterations.
* @param tol the convergence tolerance.
*
- * @see [[http://www.jstor.org/stable/2345503 P. J. Green, Iteratively Reweighted Least Squares
- * for Maximum Likelihood Estimation, and some Robust and Resistant Alternatives,
- * Journal of the Royal Statistical Society. Series B, 1984.]]
+ * @see <a href="http://www.jstor.org/stable/2345503">P. J. Green, Iteratively
+ * Reweighted Least Squares for Maximum Likelihood Estimation, and some Robust
+ * and Resistant Alternatives, Journal of the Royal Statistical Society.
+ * Series B, 1984.</a>
*/
private[ml] class IterativelyReweightedLeastSquares(
val initialModel: WeightedLeastSquaresModel,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index fa4530927e..e3e03dfd43 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -29,7 +29,7 @@ import org.apache.spark.ml.param._
private[ml] trait HasRegParam extends Params {
/**
- * Param for regularization parameter (>= 0).
+ * Param for regularization parameter (&gt;= 0).
* @group param
*/
final val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter (>= 0)", ParamValidators.gtEq(0))
@@ -44,7 +44,7 @@ private[ml] trait HasRegParam extends Params {
private[ml] trait HasMaxIter extends Params {
/**
- * Param for maximum number of iterations (>= 0).
+ * Param for maximum number of iterations (&gt;= 0).
* @group param
*/
final val maxIter: IntParam = new IntParam(this, "maxIter", "maximum number of iterations (>= 0)", ParamValidators.gtEq(0))
@@ -238,7 +238,7 @@ private[ml] trait HasOutputCol extends Params {
private[ml] trait HasCheckpointInterval extends Params {
/**
- * Param for set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
+ * Param for set checkpoint interval (&gt;= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
* @group param
*/
final val checkpointInterval: IntParam = new IntParam(this, "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations", (interval: Int) => interval == -1 || interval >= 1)
@@ -334,7 +334,7 @@ private[ml] trait HasElasticNetParam extends Params {
private[ml] trait HasTol extends Params {
/**
- * Param for the convergence tolerance for iterative algorithms (>= 0).
+ * Param for the convergence tolerance for iterative algorithms (&gt;= 0).
* @group param
*/
final val tol: DoubleParam = new DoubleParam(this, "tol", "the convergence tolerance for iterative algorithms (>= 0)", ParamValidators.gtEq(0))
@@ -349,7 +349,7 @@ private[ml] trait HasTol extends Params {
private[ml] trait HasStepSize extends Params {
/**
- * Param for Step size to be used for each iteration of optimization (> 0).
+ * Param for Step size to be used for each iteration of optimization (&gt; 0).
* @group param
*/
final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization (> 0)", ParamValidators.gt(0))
@@ -396,7 +396,7 @@ private[ml] trait HasSolver extends Params {
private[ml] trait HasAggregationDepth extends Params {
/**
- * Param for suggested depth for treeAggregate (>= 2).
+ * Param for suggested depth for treeAggregate (&gt;= 2).
* @group expertParam
*/
final val aggregationDepth: IntParam = new IntParam(this, "aggregationDepth", "suggested depth for treeAggregate (>= 2)", ParamValidators.gtEq(2))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 9d5ba99978..d6ad1ea6d1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -119,7 +119,8 @@ private[regression] trait AFTSurvivalRegressionParams extends Params
/**
* :: Experimental ::
* Fit a parametric survival regression model named accelerated failure time (AFT) model
- * ([[https://en.wikipedia.org/wiki/Accelerated_failure_time_model]])
+ * (see <a href="https://en.wikipedia.org/wiki/Accelerated_failure_time_model">
+ * Accelerated failure time model (Wikipedia)</a>)
* based on the Weibull distribution of the survival time.
*/
@Experimental
@@ -432,24 +433,24 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
* Given the values of the covariates $x^{'}$, for random lifetime $t_{i}$ of subjects i = 1,..,n,
* with possible right-censoring, the likelihood function under the AFT model is given as
*
- * <p><blockquote>
+ * <blockquote>
* $$
* L(\beta,\sigma)=\prod_{i=1}^n[\frac{1}{\sigma}f_{0}
* (\frac{\log{t_{i}}-x^{'}\beta}{\sigma})]^{\delta_{i}}S_{0}
* (\frac{\log{t_{i}}-x^{'}\beta}{\sigma})^{1-\delta_{i}}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* Where $\delta_{i}$ is the indicator of the event has occurred i.e. uncensored or not.
* Using $\epsilon_{i}=\frac{\log{t_{i}}-x^{'}\beta}{\sigma}$, the log-likelihood function
* assumes the form
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \iota(\beta,\sigma)=\sum_{i=1}^{n}[-\delta_{i}\log\sigma+
* \delta_{i}\log{f_{0}}(\epsilon_{i})+(1-\delta_{i})\log{S_{0}(\epsilon_{i})}]
* $$
- * </blockquote></p>
+ * </blockquote>
* Where $S_{0}(\epsilon_{i})$ is the baseline survivor function,
* and $f_{0}(\epsilon_{i})$ is corresponding density function.
*
@@ -458,34 +459,34 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
* to extreme value distribution for log of the lifetime,
* and the $S_{0}(\epsilon)$ function is
*
- * <p><blockquote>
+ * <blockquote>
* $$
* S_{0}(\epsilon_{i})=\exp(-e^{\epsilon_{i}})
* $$
- * </blockquote></p>
+ * </blockquote>
*
* and the $f_{0}(\epsilon_{i})$ function is
*
- * <p><blockquote>
+ * <blockquote>
* $$
* f_{0}(\epsilon_{i})=e^{\epsilon_{i}}\exp(-e^{\epsilon_{i}})
* $$
- * </blockquote></p>
+ * </blockquote>
*
* The log-likelihood function for Weibull distribution of lifetime is
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \iota(\beta,\sigma)=
* -\sum_{i=1}^n[\delta_{i}\log\sigma-\delta_{i}\epsilon_{i}+e^{\epsilon_{i}}]
* $$
- * </blockquote></p>
+ * </blockquote>
*
* Due to minimizing the negative log-likelihood equivalent to maximum a posteriori probability,
* the loss function we use to optimize is $-\iota(\beta,\sigma)$.
* The gradient functions for $\beta$ and $\log\sigma$ respectively are
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \frac{\partial (-\iota)}{\partial \beta}=
* \sum_{1=1}^{n}[\delta_{i}-e^{\epsilon_{i}}]\frac{x_{i}}{\sigma} \\
@@ -493,7 +494,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
* \frac{\partial (-\iota)}{\partial (\log\sigma)}=
* \sum_{i=1}^{n}[\delta_{i}+(\delta_{i}-e^{\epsilon_{i}})\epsilon_{i}]
* $$
- * </blockquote></p>
+ * </blockquote>
*
* @param bcParameters The broadcasted value includes three part: The log of scale parameter,
* the intercept and regression coefficients corresponding to the features.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 1419da8747..894b6a2ca2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -38,8 +38,8 @@ import org.apache.spark.sql.functions._
/**
- * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
- * for regression.
+ * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning">Decision tree</a>
+ * learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index fa69d60836..ed2d05525d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
/**
- * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
+ * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Gradient-Boosted Trees (GBTs)</a>
* learning algorithm for regression.
* It supports both continuous and categorical features.
*
@@ -151,7 +151,7 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
}
/**
- * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
+ * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Gradient-Boosted Trees (GBTs)</a>
* model for regression.
* It supports both continuous and categorical features.
* @param _trees Decision trees in the ensemble.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index f33dd0fd29..1201ecd5e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -123,9 +123,11 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
/**
* :: Experimental ::
*
- * Fit a Generalized Linear Model ([[https://en.wikipedia.org/wiki/Generalized_linear_model]])
- * specified by giving a symbolic description of the linear predictor (link function) and
- * a description of the error distribution (family).
+ * Fit a Generalized Linear Model
+ * (see <a href="https://en.wikipedia.org/wiki/Generalized_linear_model">
+ * Generalized linear model (Wikipedia)</a>)
+ * specified by giving a symbolic description of the linear
+ * predictor (link function) and a description of the error distribution (family).
* It supports "gaussian", "binomial", "poisson" and "gamma" as family.
* Valid link functions for each family is listed below. The first link function of each family
* is the default one.
@@ -196,11 +198,11 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
/**
* Sets the regularization parameter for L2 regularization.
* The regularization term is
- * <p><blockquote>
+ * <blockquote>
* $$
* 0.5 * regParam * L2norm(coefficients)^2
* $$
- * </blockquote></p>
+ * </blockquote>
* Default is 0.0.
*
* @group setParam
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 8ea5e1e6c4..eb4e38cc83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -624,7 +624,8 @@ class LinearRegressionSummary private[regression] (
/**
* Returns the explained variance regression score.
* explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
- * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
+ * Reference: <a href="http://en.wikipedia.org/wiki/Explained_variation">
+ * Wikipedia explain variation</a>
*
* @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
* This will change in later Spark versions.
@@ -664,7 +665,8 @@ class LinearRegressionSummary private[regression] (
/**
* Returns R^2^, the coefficient of determination.
- * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+ * Reference: <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination">
+ * Wikipedia coefficient of determination</a>
*
* @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
* This will change in later Spark versions.
@@ -805,11 +807,11 @@ class LinearRegressionSummary private[regression] (
* When training with intercept enabled,
* The objective function in the scaled space is given by
*
- * <p><blockquote>
+ * <blockquote>
* $$
* L = 1/2n ||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2,
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where $\bar{x_i}$ is the mean of $x_i$, $\hat{x_i}$ is the standard deviation of $x_i$,
* $\bar{y}$ is the mean of label, and $\hat{y}$ is the standard deviation of label.
@@ -820,7 +822,7 @@ class LinearRegressionSummary private[regression] (
*
* This can be rewritten as
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* L &= 1/2n ||\sum_i (w_i/\hat{x_i})x_i - \sum_i (w_i/\hat{x_i})\bar{x_i} - y / \hat{y}
@@ -828,34 +830,34 @@ class LinearRegressionSummary private[regression] (
* &= 1/2n ||\sum_i w_i^\prime x_i - y / \hat{y} + offset||^2 = 1/2n diff^2
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where $w_i^\prime$ is the effective coefficients defined by $w_i/\hat{x_i}$, offset is
*
- * <p><blockquote>
+ * <blockquote>
* $$
* - \sum_i (w_i/\hat{x_i})\bar{x_i} + \bar{y} / \hat{y}.
* $$
- * </blockquote></p>
+ * </blockquote>
*
* and diff is
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \sum_i w_i^\prime x_i - y / \hat{y} + offset
* $$
- * </blockquote></p>
+ * </blockquote>
*
* Note that the effective coefficients and offset don't depend on training dataset,
* so they can be precomputed.
*
* Now, the first derivative of the objective function in scaled space is
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \frac{\partial L}{\partial w_i} = diff/N (x_i - \bar{x_i}) / \hat{x_i}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* However, $(x_i - \bar{x_i})$ will densify the computation, so it's not
* an ideal formula when the training dataset is sparse format.
@@ -865,7 +867,7 @@ class LinearRegressionSummary private[regression] (
* objective function from all the samples is
*
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* \frac{\partial L}{\partial w_i} &=
@@ -874,14 +876,14 @@ class LinearRegressionSummary private[regression] (
* &= 1/N ((\sum_j diff_j x_{ij} / \hat{x_i}) + correction_i)
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where $correction_i = - diffSum \bar{x_i} / \hat{x_i}$
*
* A simple math can show that diffSum is actually zero, so we don't even
* need to add the correction terms in the end. From the definition of diff,
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* diffSum &= \sum_j (\sum_i w_i(x_{ij} - \bar{x_i})
@@ -890,17 +892,17 @@ class LinearRegressionSummary private[regression] (
* &= 0
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* As a result, the first derivative of the total objective function only depends on
* the training dataset, which can be easily computed in distributed fashion, and is
* sparse format friendly.
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \frac{\partial L}{\partial w_i} = 1/N ((\sum_j diff_j x_{ij} / \hat{x_i})
* $$
- * </blockquote></p>
+ * </blockquote>
*
* @param bcCoefficients The broadcast coefficients corresponding to the features.
* @param labelStd The standard deviation value of the label.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 0ad00aa6f9..d60f05eed5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -37,7 +37,8 @@ import org.apache.spark.sql.functions._
/**
- * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression.
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a>
+ * learning algorithm for regression.
* It supports both continuous and categorical features.
*/
@Since("1.4.0")
@@ -132,7 +133,7 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor
}
/**
- * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for regression.
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> model for regression.
* It supports both continuous and categorical features.
*
* @param _trees Decision trees in the ensemble.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
index e137692703..e4de8483cf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
@@ -17,15 +17,12 @@
package org.apache.spark.ml.source.libsvm
-import org.apache.spark.ml.linalg.Vector
-import org.apache.spark.sql.{DataFrame, DataFrameReader}
-
/**
- * `libsvm` package implements Spark SQL data source API for loading LIBSVM data as [[DataFrame]].
- * The loaded [[DataFrame]] has two columns: `label` containing labels stored as doubles and
- * `features` containing feature vectors stored as [[Vector]]s.
+ * `libsvm` package implements Spark SQL data source API for loading LIBSVM data as `DataFrame`.
+ * The loaded `DataFrame` has two columns: `label` containing labels stored as doubles and
+ * `features` containing feature vectors stored as `Vector`s.
*
- * To use LIBSVM data source, you need to set "libsvm" as the format in [[DataFrameReader]] and
+ * To use LIBSVM data source, you need to set "libsvm" as the format in `DataFrameReader` and
* optionally specify options, for example:
* {{{
* // Scala
@@ -51,6 +48,6 @@ import org.apache.spark.sql.{DataFrame, DataFrameReader}
* @note This class is public for documentation purpose. Please don't use this class directly.
* Rather, use the data source API as illustrated above.
*
- * @see [[https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ LIBSVM datasets]]
+ * @see <a href="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/">LIBSVM datasets</a>
*/
class LibSVMDataSource private() {}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index 0a0bc4c006..f3bace8181 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -34,7 +34,7 @@ private[spark] object GradientBoostedTrees extends Logging {
/**
* Method to train a gradient boosting model
- * @param input Training dataset: RDD of [[LabeledPoint]].
+ * @param input Training dataset: RDD of `LabeledPoint`.
* @param seed Random seed.
* @return tuple of ensemble models and weights:
* (array of decision tree models, array of model weights)
@@ -59,12 +59,12 @@ private[spark] object GradientBoostedTrees extends Logging {
/**
* Method to validate a gradient boosting model
- * @param input Training dataset: RDD of [[LabeledPoint]].
+ * @param input Training dataset: RDD of `LabeledPoint`.
* @param validationInput Validation dataset.
* This dataset should be different from the training dataset,
* but it should follow the same distribution.
* E.g., these two datasets could be created from an original dataset
- * by using [[org.apache.spark.rdd.RDD.randomSplit()]]
+ * by using `org.apache.spark.rdd.RDD.randomSplit()`
* @param seed Random seed.
* @return tuple of ensemble models and weights:
* (array of decision tree models, array of model weights)
@@ -162,7 +162,7 @@ private[spark] object GradientBoostedTrees extends Logging {
* Method to calculate error of the base learner for the gradient boosting calculation.
* Note: This method is not used by the gradient boosting algorithm but is useful for debugging
* purposes.
- * @param data Training dataset: RDD of [[LabeledPoint]].
+ * @param data Training dataset: RDD of `LabeledPoint`.
* @param trees Boosted Decision Tree models
* @param treeWeights Learning rates at each boosting iteration.
* @param loss evaluation metric.
@@ -184,7 +184,7 @@ private[spark] object GradientBoostedTrees extends Logging {
/**
* Method to compute error or loss for every iteration of gradient boosting.
*
- * @param data RDD of [[LabeledPoint]]
+ * @param data RDD of `LabeledPoint`
* @param trees Boosted Decision Tree models
* @param treeWeights Learning rates at each boosting iteration.
* @param loss evaluation metric.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index 8ae5ca3c84..a61ea374cb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -82,7 +82,7 @@ private[spark] object RandomForest extends Logging {
/**
* Train a random forest.
*
- * @param input Training data: RDD of [[LabeledPoint]]
+ * @param input Training data: RDD of `LabeledPoint`
* @return an unweighted set of trees
*/
def run(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 5a551533be..40510ad804 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -342,9 +342,9 @@ private[ml] trait HasFeatureSubsetStrategy extends Params {
* - sqrt: recommended by Breiman manual for random forests
* - The defaults of sqrt (classification) and onethird (regression) match the R randomForest
* package.
- * @see [[http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf Breiman (2001)]]
- * @see [[http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf Breiman manual for
- * random forests]]
+ * @see <a href="http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf">Breiman (2001)</a>
+ * @see <a href="http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf">
+ * Breiman manual for random forests</a>
*
* @group param
*/
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 6ea52ef7f0..85191d46fd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.types.StructType
*/
private[ml] trait CrossValidatorParams extends ValidatorParams {
/**
- * Param for number of folds for cross validation. Must be >= 2.
+ * Param for number of folds for cross validation. Must be &gt;= 2.
* Default: 3
*
* @group param
@@ -198,7 +198,7 @@ object CrossValidator extends MLReadable[CrossValidator] {
*
* @param bestModel The best model selected from k-fold cross validation.
* @param avgMetrics Average cross-validation metrics for each paramMap in
- * [[CrossValidator.estimatorParamMaps]], in the corresponding order.
+ * `CrossValidator.estimatorParamMaps`, in the corresponding order.
*/
@Since("1.2.0")
class CrossValidatorModel private[ml] (
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index e5fa5d53e3..5b7e5ec75c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -163,7 +163,7 @@ trait MLWritable {
/**
* :: DeveloperApi ::
*
- * Helper trait for making simple [[Params]] types writable. If a [[Params]] class stores
+ * Helper trait for making simple `Params` types writable. If a `Params` class stores
* all data as [[org.apache.spark.ml.param.Param]] values, then extending this trait will provide
* a default implementation of writing saved instances of the class.
* This only handles simple [[org.apache.spark.ml.param.Param]] types; e.g., it will not handle
@@ -231,7 +231,7 @@ trait MLReadable[T] {
/**
* :: DeveloperApi ::
*
- * Helper trait for making simple [[Params]] types readable. If a [[Params]] class stores
+ * Helper trait for making simple `Params` types readable. If a `Params` class stores
* all data as [[org.apache.spark.ml.param.Param]] values, then extending this trait will provide
* a default implementation of reading saved instances of the class.
* This only handles simple [[org.apache.spark.ml.param.Param]] types; e.g., it will not handle
@@ -360,7 +360,7 @@ private[ml] object DefaultParamsReader {
/**
* Get the JSON value of the [[org.apache.spark.ml.param.Param]] of the given name.
- * This can be useful for getting a Param value before an instance of [[Params]]
+ * This can be useful for getting a Param value before an instance of `Params`
* is available.
*/
def getParamValue(paramName: String): JValue = {
@@ -438,7 +438,7 @@ private[ml] object DefaultParamsReader {
}
/**
- * Load a [[Params]] instance from the given path, and return it.
+ * Load a `Params` instance from the given path, and return it.
* This assumes the instance implements [[MLReadable]].
*/
def loadParamsInstance[T](path: String, sc: SparkContext): T = {
@@ -454,7 +454,7 @@ private[ml] object DefaultParamsReader {
private[ml] object MetaAlgorithmReadWrite {
/**
* Examine the given estimator (which may be a compound estimator) and extract a mapping
- * from UIDs to corresponding [[Params]] instances.
+ * from UIDs to corresponding `Params` instances.
*/
def getUidMap(instance: Params): Map[String, Params] = {
val uidList = getUidMapImpl(instance)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 767d056861..fa46ba3ace 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -302,10 +302,11 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
/**
* Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
*
- * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
- * discrete data. For example, by converting documents into TF-IDF vectors, it can be used for
- * document classification. By making every vector a 0-1 vector, it can also be used as
- * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The input feature values must be nonnegative.
+ * This is the Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">here</a>) which can
+ * handle all kinds of discrete data. For example, by converting documents into TF-IDF
+ * vectors, it can be used for document classification. By making every vector a 0-1 vector,
+ * it can also be used as Bernoulli NB (see <a href="http://tinyurl.com/p7c96j6">here</a>).
+ * The input feature values must be nonnegative.
*/
@Since("0.9.0")
class NaiveBayes private (
@@ -402,9 +403,9 @@ object NaiveBayes {
/**
* Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
*
- * This is the default Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all
- * kinds of discrete data. For example, by converting documents into TF-IDF vectors, it
- * can be used for document classification.
+ * This is the default Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">here</a>)
+ * which can handle all kinds of discrete data. For example, by converting documents into
+ * TF-IDF vectors, it can be used for document classification.
*
* This version of the method uses a default smoothing parameter of 1.0.
*
@@ -419,9 +420,9 @@ object NaiveBayes {
/**
* Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
*
- * This is the default Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all
- * kinds of discrete data. For example, by converting documents into TF-IDF vectors, it
- * can be used for document classification.
+ * This is the default Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">here</a>)
+ * which can handle all kinds of discrete data. For example, by converting documents
+ * into TF-IDF vectors, it can be used for document classification.
*
* @param input RDD of `(label, array of features)` pairs. Every vector should be a frequency
* vector or a count vector.
@@ -435,9 +436,10 @@ object NaiveBayes {
/**
* Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
*
- * The model type can be set to either Multinomial NB ([[http://tinyurl.com/lsdw6p]])
- * or Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The Multinomial NB can handle
- * discrete count data and can be called by setting the model type to "multinomial".
+ * The model type can be set to either Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">
+ * here</a>) or Bernoulli NB (see <a href="http://tinyurl.com/p7c96j6">here</a>).
+ * The Multinomial NB can handle discrete count data and can be called by setting the model
+ * type to "multinomial".
* For example, it can be used with word counts or TF_IDF vectors of documents.
* The Bernoulli model fits presence or absence (0-1) counts. By making every vector a
* 0-1 vector and setting the model type to "bernoulli", the fits and predicts as
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index e6b89712e2..31f5141752 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -43,13 +43,14 @@ import org.apache.spark.storage.StorageLevel
* @param k the desired number of leaf clusters (default: 4). The actual number could be smaller if
* there are no divisible leaf clusters.
* @param maxIterations the max number of k-means iterations to split clusters (default: 20)
- * @param minDivisibleClusterSize the minimum number of points (if >= 1.0) or the minimum proportion
- * of points (if < 1.0) of a divisible cluster (default: 1)
+ * @param minDivisibleClusterSize the minimum number of points (if &gt;= 1.0) or the minimum
+ * proportion of points (if &lt; 1.0) of a divisible cluster
+ * (default: 1)
* @param seed a random seed (default: hash value of the class name)
*
- * @see [[http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf
- * Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
- * KDD Workshop on Text Mining, 2000.]]
+ * @see <a href="http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf">
+ * Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
+ * KDD Workshop on Text Mining, 2000.</a>
*/
@Since("1.6.0")
class BisectingKMeans private (
@@ -100,8 +101,8 @@ class BisectingKMeans private (
def getMaxIterations: Int = this.maxIterations
/**
- * Sets the minimum number of points (if >= `1.0`) or the minimum proportion of points
- * (if < `1.0`) of a divisible cluster (default: 1).
+ * Sets the minimum number of points (if &gt;= `1.0`) or the minimum proportion of points
+ * (if &lt; `1.0`) of a divisible cluster (default: 1).
*/
@Since("1.6.0")
def setMinDivisibleClusterSize(minDivisibleClusterSize: Double): this.type = {
@@ -112,8 +113,8 @@ class BisectingKMeans private (
}
/**
- * Gets the minimum number of points (if >= `1.0`) or the minimum proportion of points
- * (if < `1.0`) of a divisible cluster.
+ * Gets the minimum number of points (if &gt;= `1.0`) or the minimum proportion of points
+ * (if &lt; `1.0`) of a divisible cluster.
*/
@Since("1.6.0")
def getMinDivisibleClusterSize: Double = minDivisibleClusterSize
@@ -218,7 +219,7 @@ class BisectingKMeans private (
}
/**
- * Java-friendly version of [[run()]].
+ * Java-friendly version of `run()`.
*/
def run(data: JavaRDD[Vector]): BisectingKMeansModel = run(data.rdd)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 8438015cce..6f1ab091b2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -71,7 +71,7 @@ class BisectingKMeansModel private[clustering] (
}
/**
- * Java-friendly version of [[predict()]].
+ * Java-friendly version of `predict()`.
*/
@Since("1.6.0")
def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
@@ -95,7 +95,7 @@ class BisectingKMeansModel private[clustering] (
}
/**
- * Java-friendly version of [[computeCost()]].
+ * Java-friendly version of `computeCost()`.
*/
@Since("1.6.0")
def computeCost(data: JavaRDD[Vector]): Double = this.computeCost(data.rdd)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 56cdeea5f7..6873d4277a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -234,7 +234,7 @@ class GaussianMixture private (
}
/**
- * Java-friendly version of [[run()]]
+ * Java-friendly version of `run()`
*/
@Since("1.3.0")
def run(data: JavaRDD[Vector]): GaussianMixtureModel = run(data.rdd)
@@ -273,8 +273,8 @@ class GaussianMixture private (
private[clustering] object GaussianMixture {
/**
- * Heuristic to distribute the computation of the [[MultivariateGaussian]]s, approximately when
- * d > 25 except for when k is very small.
+ * Heuristic to distribute the computation of the `MultivariateGaussian`s, approximately when
+ * d &gt; 25 except for when k is very small.
* @param k Number of topics
* @param d Number of features
*/
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index c30cc3e239..afbe4f978b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -80,7 +80,7 @@ class GaussianMixtureModel @Since("1.3.0") (
}
/**
- * Java-friendly version of [[predict()]]
+ * Java-friendly version of `predict()`
*/
@Since("1.4.0")
def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 7c52abdeaa..16742bd284 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -39,8 +39,8 @@ import org.apache.spark.util.Utils
* - Original LDA paper (journal version):
* Blei, Ng, and Jordan. "Latent Dirichlet Allocation." JMLR, 2003.
*
- * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
- * (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation">
+ * Latent Dirichlet allocation (Wikipedia)</a>
*/
@Since("1.3.0")
class LDA private (
@@ -113,20 +113,20 @@ class LDA private (
*
* If set to a singleton vector Vector(-1), then docConcentration is set automatically. If set to
* singleton vector Vector(t) where t != -1, then t is replicated to a vector of length k during
- * [[LDAOptimizer.initialize()]]. Otherwise, the [[docConcentration]] vector must be length k.
+ * `LDAOptimizer.initialize()`. Otherwise, the [[docConcentration]] vector must be length k.
* (default = Vector(-1) = automatic)
*
* Optimizer-specific parameter settings:
* - EM
* - Currently only supports symmetric distributions, so all values in the vector should be
* the same.
- * - Values should be > 1.0
+ * - Values should be &gt; 1.0
* - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows
* from Asuncion et al. (2009), who recommend a +1 adjustment for EM.
* - Online
- * - Values should be >= 0
+ * - Values should be &gt;= 0
* - default = uniformly (1.0 / k), following the implementation from
- * [[https://github.com/Blei-Lab/onlineldavb]].
+ * <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
*/
@Since("1.5.0")
def setDocConcentration(docConcentration: Vector): this.type = {
@@ -158,13 +158,13 @@ class LDA private (
def getAlpha: Double = getDocConcentration
/**
- * Alias for [[setDocConcentration()]]
+ * Alias for `setDocConcentration()`
*/
@Since("1.5.0")
def setAlpha(alpha: Vector): this.type = setDocConcentration(alpha)
/**
- * Alias for [[setDocConcentration()]]
+ * Alias for `setDocConcentration()`
*/
@Since("1.3.0")
def setAlpha(alpha: Double): this.type = setDocConcentration(alpha)
@@ -195,13 +195,13 @@ class LDA private (
*
* Optimizer-specific parameter settings:
* - EM
- * - Value should be > 1.0
+ * - Value should be &gt; 1.0
* - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows
* Asuncion et al. (2009), who recommend a +1 adjustment for EM.
* - Online
- * - Value should be >= 0
+ * - Value should be &gt;= 0
* - default = (1.0 / k), following the implementation from
- * [[https://github.com/Blei-Lab/onlineldavb]].
+ * <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
*/
@Since("1.3.0")
def setTopicConcentration(topicConcentration: Double): this.type = {
@@ -321,7 +321,7 @@ class LDA private (
* @param documents RDD of documents, which are term (word) count vectors paired with IDs.
* The term count vectors are "bags of words" with a fixed-size vocabulary
* (where the vocabulary size is the length of the vector).
- * Document IDs must be unique and >= 0.
+ * Document IDs must be unique and &gt;= 0.
* @return Inferred LDA model
*/
@Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index b5b0e64a2a..017fbc6feb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -171,7 +171,7 @@ abstract class LDAModel private[clustering] extends Saveable {
* The term count vectors are "bags of words" with a fixed-size vocabulary
* (where the vocabulary size is the length of the vector).
* This must use the same vocabulary (ordering of term counts) as in training.
- * Document IDs must be unique and >= 0.
+ * Document IDs must be unique and &gt;= 0.
* @return Estimated topic distribution for each document.
* The returned RDD may be zipped with the given RDD, where each returned vector
* is a multinomial distribution over topics.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 7365ea1f20..9687fc8804 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -563,7 +563,7 @@ private[clustering] object OnlineLDAOptimizer {
*
* An optimization (Lee, Seung: Algorithms for non-negative matrix factorization, NIPS 2001)
* avoids explicit computation of variational parameter `phi`.
- * @see [[http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.7566]]
+ * @see <a href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.7566">here</a>
*
* @return Returns a tuple of `gammad` - estimate of gamma, the topic distribution, `sstatsd` -
* statistics for updating lambda and `ids` - list of termCounts vector indices.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index c760ddd6ad..4d3e265455 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.random.XORShiftRandom
* Model produced by [[PowerIterationClustering]].
*
* @param k number of clusters
- * @param assignments an RDD of clustering [[PowerIterationClustering#Assignment]]s
+ * @param assignments an RDD of clustering `PowerIterationClustering#Assignment`s
*/
@Since("1.3.0")
class PowerIterationClusteringModel @Since("1.3.0") (
@@ -103,9 +103,9 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
/**
* Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
- * [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very
- * low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise
- * similarity matrix of the data.
+ * <a href="http://www.icml2010.org/papers/387.pdf">Lin and Cohen</a>. From the abstract: PIC finds
+ * a very low-dimensional embedding of a dataset using truncated power iteration on a normalized
+ * pair-wise similarity matrix of the data.
*
* @param k Number of clusters.
* @param maxIterations Maximum number of iterations of the PIC algorithm.
@@ -113,7 +113,8 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
* as vertex properties, or "degree" to use normalized sum similarities.
* Default: random.
*
- * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Spectral_clustering">
+ * Spectral clustering (Wikipedia)</a>
*/
@Since("1.3.0")
class PowerIterationClustering private[clustering] (
@@ -210,7 +211,7 @@ class PowerIterationClustering private[clustering] (
}
/**
- * A Java-friendly version of [[PowerIterationClustering.run]].
+ * A Java-friendly version of `PowerIterationClustering.run`.
*/
@Since("1.3.0")
def run(similarities: JavaRDD[(java.lang.Long, java.lang.Long, java.lang.Double)])
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index f20ab09bf0..85c37c438d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -39,14 +39,14 @@ import org.apache.spark.util.random.XORShiftRandom
* generalized to incorporate forgetfullness (i.e. decay).
* The update rule (for each cluster) is:
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* c_t+1 &= [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t] \\
* n_t+t &= n_t * a + m_t
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* Where c_t is the previously estimated centroid for that cluster,
* n_t is the number of points assigned to it thus far, x_t is the centroid
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 8f777cc35b..ad99b00a31 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -74,7 +74,8 @@ class RegressionMetrics @Since("2.0.0") (
/**
* Returns the variance explained by regression.
* explainedVariance = $\sum_i (\hat{y_i} - \bar{y})^2^ / n$
- * @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
+ * @see <a href="https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained">
+ * Fraction of variance unexplained (Wikipedia)</a>
*/
@Since("1.2.0")
def explainedVariance: Double = {
@@ -110,10 +111,11 @@ class RegressionMetrics @Since("2.0.0") (
/**
* Returns R^2^, the unadjusted coefficient of determination.
- * @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+ * @see <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination">
+ * Coefficient of determination (Wikipedia)</a>
* In case of regression through the origin, the definition of R^2^ is to be modified.
- * @see J. G. Eisenhauer, Regression through the Origin. Teaching Statistics 25, 76-80 (2003)
- * [[https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf]]
+ * @see <a href="https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf">
+ * J. G. Eisenhauer, Regression through the Origin. Teaching Statistics 25, 76-80 (2003)</a>
*/
@Since("1.2.0")
def r2: Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index 0f7fbe9556..b533860122 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -147,18 +147,18 @@ object FPGrowthModel extends Loader[FPGrowthModel[_]] {
/**
* A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
- * [[http://dx.doi.org/10.1145/1454008.1454027 Li et al., PFP: Parallel FP-Growth for Query
- * Recommendation]]. PFP distributes computation in such a way that each worker executes an
+ * <a href="http://dx.doi.org/10.1145/1454008.1454027">Li et al., PFP: Parallel FP-Growth for Query
+ * Recommendation</a>. PFP distributes computation in such a way that each worker executes an
* independent group of mining tasks. The FP-Growth algorithm is described in
- * [[http://dx.doi.org/10.1145/335191.335372 Han et al., Mining frequent patterns without candidate
- * generation]].
+ * <a href="http://dx.doi.org/10.1145/335191.335372">Han et al., Mining frequent patterns without
+ * candidate generation</a>.
*
* @param minSupport the minimal support level of the frequent pattern, any pattern that appears
* more than (minSupport * size-of-the-dataset) times will be output
* @param numPartitions number of partitions used by parallel FP-growth
*
- * @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
- * (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Association_rule_learning">
+ * Association rule learning (Wikipedia)</a>
*
*/
@Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index 7382000791..a564167221 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -44,7 +44,8 @@ import org.apache.spark.storage.StorageLevel
/**
* A parallel PrefixSpan algorithm to mine frequent sequential patterns.
* The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
- * Efficiently by Prefix-Projected Pattern Growth ([[http://doi.org/10.1109/ICDE.2001.914830]]).
+ * Efficiently by Prefix-Projected Pattern Growth
+ * (see <a href="http://doi.org/10.1109/ICDE.2001.914830">here</a>).
*
* @param minSupport the minimal support level of the sequential pattern, any pattern that appears
* more than (minSupport * size-of-the-dataset) times will be output
@@ -55,8 +56,8 @@ import org.apache.spark.storage.StorageLevel
* processing. If a projected database exceeds this size, another
* iteration of distributed prefix growth is run.
*
- * @see [[https://en.wikipedia.org/wiki/Sequential_Pattern_Mining Sequential Pattern Mining
- * (Wikipedia)]]
+ * @see <a href="https://en.wikipedia.org/wiki/Sequential_Pattern_Mining">Sequential Pattern Mining
+ * (Wikipedia)</a>
*/
@Since("1.5.0")
class PrefixSpan private (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 03866753b5..9e75217410 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -385,10 +385,10 @@ class BlockMatrix @Since("1.3.0") (
/**
* Adds the given block matrix `other` to `this` block matrix: `this + other`.
* The matrices must have the same size and matching `rowsPerBlock` and `colsPerBlock`
- * values. If one of the blocks that are being added are instances of [[SparseMatrix]],
- * the resulting sub matrix will also be a [[SparseMatrix]], even if it is being added
- * to a [[DenseMatrix]]. If two dense matrices are added, the output will also be a
- * [[DenseMatrix]].
+ * values. If one of the blocks that are being added are instances of `SparseMatrix`,
+ * the resulting sub matrix will also be a `SparseMatrix`, even if it is being added
+ * to a `DenseMatrix`. If two dense matrices are added, the output will also be a
+ * `DenseMatrix`.
*/
@Since("1.3.0")
def add(other: BlockMatrix): BlockMatrix =
@@ -397,10 +397,10 @@ class BlockMatrix @Since("1.3.0") (
/**
* Subtracts the given block matrix `other` from `this` block matrix: `this - other`.
* The matrices must have the same size and matching `rowsPerBlock` and `colsPerBlock`
- * values. If one of the blocks that are being subtracted are instances of [[SparseMatrix]],
- * the resulting sub matrix will also be a [[SparseMatrix]], even if it is being subtracted
- * from a [[DenseMatrix]]. If two dense matrices are subtracted, the output will also be a
- * [[DenseMatrix]].
+ * values. If one of the blocks that are being subtracted are instances of `SparseMatrix`,
+ * the resulting sub matrix will also be a `SparseMatrix`, even if it is being subtracted
+ * from a `DenseMatrix`. If two dense matrices are subtracted, the output will also be a
+ * `DenseMatrix`.
*/
@Since("2.0.0")
def subtract(other: BlockMatrix): BlockMatrix =
@@ -447,8 +447,8 @@ class BlockMatrix @Since("1.3.0") (
/**
* Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
* of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
- * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
- * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
+ * `SparseMatrix`, they will have to be converted to a `DenseMatrix`. The output
+ * [[BlockMatrix]] will only consist of blocks of `DenseMatrix`. This may cause
* some performance issues until support for multiplying two sparse matrices is added.
*
* @note The behavior of multiply has changed in 1.6.0. `multiply` used to throw an error when
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 008b03d1cc..d2c5b14a5b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -101,14 +101,14 @@ class CoordinateMatrix @Since("1.0.0") (
toIndexedRowMatrix().toRowMatrix()
}
- /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+ /** Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024. */
@Since("1.3.0")
def toBlockMatrix(): BlockMatrix = {
toBlockMatrix(1024, 1024)
}
/**
- * Converts to BlockMatrix. Creates blocks of [[SparseMatrix]].
+ * Converts to BlockMatrix. Creates blocks of `SparseMatrix`.
* @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have
* a smaller value. Must be an integer value greater than 0.
* @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 809906a158..590e959daa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -90,14 +90,14 @@ class IndexedRowMatrix @Since("1.0.0") (
new RowMatrix(rows.map(_.vector), 0L, nCols)
}
- /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+ /** Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024. */
@Since("1.3.0")
def toBlockMatrix(): BlockMatrix = {
toBlockMatrix(1024, 1024)
}
/**
- * Converts to BlockMatrix. Creates blocks of [[SparseMatrix]].
+ * Converts to BlockMatrix. Creates blocks of `SparseMatrix`.
* @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have
* a smaller value. Must be an integer value greater than 0.
* @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 4b120332ab..78a8810052 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -531,7 +531,7 @@ class RowMatrix @Since("1.0.0") (
* decomposition (factorization) for the [[RowMatrix]] of a tall and skinny shape.
* Reference:
* Paul G. Constantine, David F. Gleich. "Tall and skinny QR factorizations in MapReduce
- * architectures" ([[http://dx.doi.org/10.1145/1996092.1996103]])
+ * architectures" (see <a href="http://dx.doi.org/10.1145/1996092.1996103">here</a>)
*
* @param computeQ whether to computeQ
* @return QRDecomposition(Q, R), Q = null if computeQ = false.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index c49e72646b..0efce3c76f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -67,14 +67,14 @@ abstract class Gradient extends Serializable {
* http://statweb.stanford.edu/~tibs/ElemStatLearn/ , Eq. (4.17) on page 119 gives the formula of
* multinomial logistic regression model. A simple calculation shows that
*
- * <p><blockquote>
+ * <blockquote>
* $$
* P(y=0|x, w) = 1 / (1 + \sum_i^{K-1} \exp(x w_i))\\
* P(y=1|x, w) = exp(x w_1) / (1 + \sum_i^{K-1} \exp(x w_i))\\
* ...\\
* P(y=K-1|x, w) = exp(x w_{K-1}) / (1 + \sum_i^{K-1} \exp(x w_i))\\
* $$
- * </blockquote></p>
+ * </blockquote>
*
* for K classes multiclass classification problem.
*
@@ -83,7 +83,7 @@ abstract class Gradient extends Serializable {
* will be (K-1) * N.
*
* As a result, the loss of objective function for a single instance of data can be written as
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* l(w, x) &= -log P(y|x, w) = -\alpha(y) log P(y=0|x, w) - (1-\alpha(y)) log P(y|x, w) \\
@@ -91,7 +91,7 @@ abstract class Gradient extends Serializable {
* &= log(1 + \sum_i^{K-1}\exp(margins_i)) - (1-\alpha(y)) margins_{y-1}
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where $\alpha(i) = 1$ if $i \ne 0$, and
* $\alpha(i) = 0$ if $i == 0$,
@@ -100,7 +100,7 @@ abstract class Gradient extends Serializable {
* For optimization, we have to calculate the first derivative of the loss function, and
* a simple calculation shows that
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* \frac{\partial l(w, x)}{\partial w_{ij}} &=
@@ -108,7 +108,7 @@ abstract class Gradient extends Serializable {
* &= multiplier_i * x_j
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where $\delta_{i, j} = 1$ if $i == j$,
* $\delta_{i, j} = 0$ if $i != j$, and
@@ -118,12 +118,12 @@ abstract class Gradient extends Serializable {
* If any of margins is larger than 709.78, the numerical computation of multiplier and loss
* function will be suffered from arithmetic overflow. This issue occurs when there are outliers
* in data which are far away from hyperplane, and this will cause the failing of training once
- * infinity / infinity is introduced. Note that this is only a concern when max(margins) > 0.
+ * infinity / infinity is introduced. Note that this is only a concern when max(margins) &gt; 0.
*
- * Fortunately, when max(margins) = maxMargin > 0, the loss function and the multiplier can be
+ * Fortunately, when max(margins) = maxMargin &gt; 0, the loss function and the multiplier can be
* easily rewritten into the following equivalent numerically stable formula.
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* l(w, x) &= log(1 + \sum_i^{K-1}\exp(margins_i)) - (1-\alpha(y)) margins_{y-1} \\
@@ -132,7 +132,7 @@ abstract class Gradient extends Serializable {
* &= log(1 + sum) + maxMargin - (1-\alpha(y)) margins_{y-1}
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
* where sum = $\exp(-maxMargin) + \sum_i^{K-1}\exp(margins_i - maxMargin) - 1$.
*
@@ -141,7 +141,7 @@ abstract class Gradient extends Serializable {
*
* For multiplier, similar trick can be applied as the following,
*
- * <p><blockquote>
+ * <blockquote>
* $$
* \begin{align}
* multiplier
@@ -150,7 +150,7 @@ abstract class Gradient extends Serializable {
* &= \exp(margins_i - maxMargin) / (1 + sum) - (1-\alpha(y)\delta_{y, i+1})
* \end{align}
* $$
- * </blockquote></p>
+ * </blockquote>
*
* where each term in $\exp$ is also smaller than zero, so overflow is not a concern.
*
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 123e0bb3e6..67da88e804 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -88,10 +88,10 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va
* convergenceTol is a condition which decides iteration termination.
* The end of iteration is decided based on below logic.
*
- * - If the norm of the new solution vector is >1, the diff of solution vectors
+ * - If the norm of the new solution vector is &gt;1, the diff of solution vectors
* is compared to relative tolerance which means normalizing by the norm of
* the new solution vector.
- * - If the norm of the new solution vector is <=1, the diff of solution vectors
+ * - If the norm of the new solution vector is &lt;=1, the diff of solution vectors
* is compared to absolute tolerance which is not normalizing.
*
* Must be between 0.0 and 1.0 inclusively.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index e49363c2c6..6232ff30a7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -31,7 +31,8 @@ import org.apache.spark.rdd.RDD
/**
* :: DeveloperApi ::
* Class used to solve an optimization problem using Limited-memory BFGS.
- * Reference: [[http://en.wikipedia.org/wiki/Limited-memory_BFGS]]
+ * Reference: <a href="http://en.wikipedia.org/wiki/Limited-memory_BFGS">
+ * Wikipedia on Limited-memory BFGS</a>
* @param gradient Gradient function to be used.
* @param updater Updater to be used to update weights after every iteration.
*/
@@ -48,8 +49,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
* Set the number of corrections used in the LBFGS update. Default 10.
* Values of numCorrections less than 3 are not recommended; large values
* of numCorrections will result in excessive computing time.
- * 3 < numCorrections < 10 is recommended.
- * Restriction: numCorrections > 0
+ * 3 &lt; numCorrections &lt; 10 is recommended.
+ * Restriction: numCorrections &gt; 0
*/
def setNumCorrections(corrections: Int): this.type = {
require(corrections > 0,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index 64d52bae00..b7c9fcfbfe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -54,7 +54,7 @@ private[spark] object NNLS {
*
* We solve the problem
* min_x 1/2 x^T ata x^T - x^T atb
- * subject to x >= 0
+ * subject to x &gt;= 0
*
* The method used is similar to one described by Polyak (B. T. Polyak, The conjugate gradient
* method in extremal problems, Zh. Vychisl. Mat. Mat. Fiz. 9(4)(1969), pp. 94-112) for bound-
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 67d484575d..aa7dd1aaa6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -95,9 +95,9 @@ class SimpleUpdater extends Updater {
* The corresponding proximal operator for the L1 norm is the soft-thresholding
* function. That is, each weight component is shrunk towards 0 by shrinkageVal.
*
- * If w > shrinkageVal, set weight component to w-shrinkageVal.
- * If w < -shrinkageVal, set weight component to w+shrinkageVal.
- * If -shrinkageVal < w < shrinkageVal, set weight component to 0.
+ * If w &gt; shrinkageVal, set weight component to w-shrinkageVal.
+ * If w &lt; -shrinkageVal, set weight component to w+shrinkageVal.
+ * If -shrinkageVal &lt; w &lt; shrinkageVal, set weight component to 0.
*
* Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
*/
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/package.scala
index 9810b6f668..8323afcb6a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/package.scala
@@ -32,7 +32,7 @@ package org.apache.spark
* to reach feature parity with the RDD-based APIs.
* And once we reach feature parity, this package will be deprecated.
*
- * @see [[https://issues.apache.org/jira/browse/SPARK-4591 SPARK-4591]] to track the progress of
- * feature parity
+ * @see <a href="https://issues.apache.org/jira/browse/SPARK-4591">SPARK-4591</a> to track
+ * the progress of feature parity
*/
package object mllib
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index 005119616f..32e6ecf630 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -48,7 +48,7 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
}
/**
- * [[sliding(Int, Int)*]] with step = 1.
+ * `sliding(Int, Int)*` with step = 1.
*/
def sliding(windowSize: Int): RDD[Array[T]] = sliding(windowSize, 1)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 0039db7ecb..76b1bc13b4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -54,11 +54,12 @@ case class Rating @Since("0.8.0") (
*
* For implicit preference data, the algorithm used is based on
* "Collaborative Filtering for Implicit Feedback Datasets", available at
- * [[http://dx.doi.org/10.1109/ICDM.2008.22]], adapted for the blocked approach used here.
+ * <a href="http://dx.doi.org/10.1109/ICDM.2008.22">here</a>, adapted for the blocked approach
+ * used here.
*
* Essentially instead of finding the low-rank approximations to the rating matrix `R`,
* this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
- * r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of
+ * r &gt; 0 and 0 if r &lt;= 0. The ratings then act as 'confidence' values related to strength of
* indicated user
* preferences rather than explicit ratings given to items.
*/
@@ -282,7 +283,7 @@ class ALS private (
}
/**
- * Java-friendly version of [[ALS.run]].
+ * Java-friendly version of `ALS.run`.
*/
@Since("1.3.0")
def run(ratings: JavaRDD[Rating]): MatrixFactorizationModel = run(ratings.rdd)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 24e4dcccc8..23045fa2b6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -146,7 +146,7 @@ class MatrixFactorizationModel @Since("0.8.0") (
}
/**
- * Java-friendly version of [[MatrixFactorizationModel.predict]].
+ * Java-friendly version of `MatrixFactorizationModel.predict`.
*/
@Since("1.2.0")
def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
@@ -195,7 +195,7 @@ class MatrixFactorizationModel @Since("0.8.0") (
* - human-readable (JSON) model metadata to path/metadata/
* - Parquet formatted data to path/data/
*
- * The model may be loaded using [[Loader.load]].
+ * The model may be loaded using `Loader.load`.
*
* @param sc Spark context used to save model data.
* @param path Path specifying the directory in which to save this model.
@@ -320,7 +320,7 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
/**
* Load a model from the given path.
*
- * The model should have been saved by [[Saveable.save]].
+ * The model should have been saved by `Saveable.save`.
*
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 377326f873..36894d5234 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -238,23 +238,22 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
* Sequential PAV implementation based on:
* Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
* "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
- * Available from [[http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf]]
+ * Available from <a href="http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf">here</a>
*
* Sequential PAV parallelization based on:
* Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
* "An approach to parallelizing isotonic regression."
* Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
- * Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
+ * Available from <a href="http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf">here</a>
*
- * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Isotonic_regression">Isotonic regression
+ * (Wikipedia)</a>
*/
@Since("1.3.0")
class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
/**
* Constructs IsotonicRegression instance with default parameter isotonic = true.
- *
- * @return New instance of IsotonicRegression.
*/
@Since("1.3.0")
def this() = this(true)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 7a2a7a35a9..7dc0c459ec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -30,12 +30,15 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
* the corresponding joint dataset.
*
* A numerically stable algorithm is implemented to compute the mean and variance of instances:
- * Reference: [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]]
+ * Reference: <a href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * variance-wiki</a>
* Zero elements (including explicit zero values) are skipped when calling add(),
* to have time complexity O(nnz) instead of O(n) for each column.
*
* For weighted instances, the unbiased estimation of variance is defined by the reliability
- * weights: [[https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights]].
+ * weights:
+ * see <a href="https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights">
+ * Reliability weights (Wikipedia)</a>.
*/
@Since("1.1.0")
@DeveloperApi
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 925fdf4d7e..7ba9b29296 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -88,7 +88,7 @@ object Statistics {
def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
/**
- * Java-friendly version of [[corr()]]
+ * Java-friendly version of `corr()`
*/
@Since("1.4.1")
def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double]): Double =
@@ -112,7 +112,7 @@ object Statistics {
def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
/**
- * Java-friendly version of [[corr()]]
+ * Java-friendly version of `corr()`
*/
@Since("1.4.1")
def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double], method: String): Double =
@@ -176,7 +176,7 @@ object Statistics {
ChiSqTest.chiSquaredFeatures(data)
}
- /** Java-friendly version of [[chiSqTest()]] */
+ /** Java-friendly version of `chiSqTest()` */
@Since("1.5.0")
def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = chiSqTest(data.rdd)
@@ -186,7 +186,8 @@ object Statistics {
* distribution of the sample data and the theoretical distribution we can provide a test for the
* the null hypothesis that the sample data comes from that theoretical distribution.
* For more information on KS Test:
- * @see [[https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test]]
+ * @see <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">
+ * Kolmogorov-Smirnov test (Wikipedia)</a>
*
* @param data an `RDD[Double]` containing the sample of data to test
* @param cdf a `Double => Double` function to calculate the theoretical CDF at a given value
@@ -217,7 +218,7 @@ object Statistics {
KolmogorovSmirnovTest.testOneSample(data, distName, params: _*)
}
- /** Java-friendly version of [[kolmogorovSmirnovTest()]] */
+ /** Java-friendly version of `kolmogorovSmirnovTest()` */
@Since("1.5.0")
@varargs
def kolmogorovSmirnovTest(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
index 39c3644450..4cf662e036 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -28,7 +28,8 @@ import org.apache.spark.mllib.util.MLUtils
* This class provides basic functionality for a Multivariate Gaussian (Normal) Distribution. In
* the event that the covariance matrix is singular, the density will be computed in a
* reduced dimensional subspace under which the distribution is supported.
- * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]])
+ * (see <a href="http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case">
+ * Degenerate case in Multivariate normal distribution (Wikipedia)</a>)
*
* @param mu The mean vector of the distribution
* @param sigma The covariance matrix of the distribution
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index ece1e41d98..cdeef16135 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -29,7 +29,7 @@ import org.apache.spark.rdd.RDD
/**
* A class that implements
- * [[http://en.wikipedia.org/wiki/Gradient_boosting Stochastic Gradient Boosting]]
+ * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Stochastic Gradient Boosting</a>
* for regression and binary classification.
*
* The implementation is based upon:
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 14f11ce51b..428af21406 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.Utils
/**
- * A class that implements a [[http://en.wikipedia.org/wiki/Random_forest Random Forest]]
+ * A class that implements a <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a>
* learning algorithm for classification and regression.
* It supports both continuous and categorical features.
*
@@ -46,9 +46,9 @@ import org.apache.spark.util.Utils
* - The defaults of sqrt (classification) and onethird (regression) match the R randomForest
* package.
*
- * @see [[http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf Breiman (2001)]]
- * @see [[http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf Breiman manual for
- * random forests]]
+ * @see <a href="http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf">Breiman (2001)</a>
+ * @see <a href="http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf">
+ * Breiman manual for random forests</a>
* @param strategy The configuration parameters for the random forest algorithm which specify
* the type of random forest (classification or regression), feature type
* (continuous, categorical), depth of the tree, quantile calculation strategy,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 5cef9d0631..be2704df34 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
* Split applied to a feature
* @param feature feature index
* @param threshold Threshold for continuous feature.
- * Split left if feature <= threshold, else right.
+ * Split left if feature &lt;= threshold, else right.
* @param featureType type of feature -- categorical or continuous
* @param categories Split left if categorical feature value is in this set, else right.
*/
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index e96c2bc6ed..6bb3271aac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -213,7 +213,7 @@ object MLUtils extends Logging {
}
/**
- * Version of [[kFold()]] taking a Long seed.
+ * Version of `kFold()` taking a Long seed.
*/
@Since("2.0.0")
def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Long): Array[(RDD[T], RDD[T])] = {
@@ -262,7 +262,7 @@ object MLUtils extends Logging {
* @param dataset input dataset
* @param cols a list of vector columns to be converted. New vector columns will be ignored. If
* unspecified, all old vector columns will be converted except nested ones.
- * @return the input [[DataFrame]] with old vector columns converted to the new vector type
+ * @return the input `DataFrame` with old vector columns converted to the new vector type
*/
@Since("2.0.0")
@varargs
@@ -314,7 +314,7 @@ object MLUtils extends Logging {
* @param dataset input dataset
* @param cols a list of vector columns to be converted. Old vector columns will be ignored. If
* unspecified, all new vector columns will be converted except nested ones.
- * @return the input [[DataFrame]] with new vector columns converted to the old vector type
+ * @return the input `DataFrame` with new vector columns converted to the old vector type
*/
@Since("2.0.0")
@varargs
@@ -366,7 +366,7 @@ object MLUtils extends Logging {
* @param dataset input dataset
* @param cols a list of matrix columns to be converted. New matrix columns will be ignored. If
* unspecified, all old matrix columns will be converted except nested ones.
- * @return the input [[DataFrame]] with old matrix columns converted to the new matrix type
+ * @return the input `DataFrame` with old matrix columns converted to the new matrix type
*/
@Since("2.0.0")
@varargs
@@ -416,7 +416,7 @@ object MLUtils extends Logging {
* @param dataset input dataset
* @param cols a list of matrix columns to be converted. Old matrix columns will be ignored. If
* unspecified, all new matrix columns will be converted except nested ones.
- * @return the input [[DataFrame]] with new matrix columns converted to the old matrix type
+ * @return the input `DataFrame` with new matrix columns converted to the old matrix type
*/
@Since("2.0.0")
@varargs
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
index c881c8ea50..da0eb04764 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -72,7 +72,7 @@ trait Loader[M <: Saveable] {
/**
* Load a model from the given path.
*
- * The model should have been saved by [[Saveable.save]].
+ * The model should have been saved by `Saveable.save`.
*
* @param sc Spark context used for loading model files.
* @param path Path specifying the directory to which the model was saved.
diff --git a/pom.xml b/pom.xml
index 7c0b0b59dc..5c417d2b35 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2495,6 +2495,18 @@
<name>tparam</name>
<placement>X</placement>
</tag>
+ <tag>
+ <name>constructor</name>
+ <placement>X</placement>
+ </tag>
+ <tag>
+ <name>todo</name>
+ <placement>X</placement>
+ </tag>
+ <tag>
+ <name>groupname</name>
+ <placement>X</placement>
+ </tag>
</tags>
</configuration>
</plugin>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 429a163d22..e3fbe0379f 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -745,7 +745,10 @@ object Unidoc {
"-tag", """example:a:Example\:""",
"-tag", """note:a:Note\:""",
"-tag", "group:X",
- "-tag", "tparam:X"
+ "-tag", "tparam:X",
+ "-tag", "constructor:X",
+ "-tag", "todo:X",
+ "-tag", "groupname:X"
),
// Use GitHub repository for Scaladoc source links
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 65f9142964..a821d2ca34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -343,7 +343,7 @@ trait Row extends Serializable {
}
/**
- * Returns a Map(name -> value) for the requested fieldNames
+ * Returns a Map(name -&gt; value) for the requested fieldNames
* For primitive types if value is null it returns 'zero value' specific for primitive
* ie. 0 for Int - use isNullAt to ensure that value is not null
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 302054708c..1a93f45903 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -37,8 +37,8 @@ import org.apache.spark.sql.types._
* - Xiangrui Meng. "Simpler Online Updates for Arbitrary-Order Central Moments."
* 2015. http://arxiv.org/abs/1510.04923
*
- * @see [[https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- * Algorithms for calculating variance (Wikipedia)]]
+ * @see <a href="https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * Algorithms for calculating variance (Wikipedia)</a>
*
* @param child to compute central moments of.
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index a4a358a242..02c8318b4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
/**
* The data type representing `Array[Byte]` values.
- * Please use the singleton [[DataTypes.BinaryType]].
+ * Please use the singleton `DataTypes.BinaryType`.
*/
@InterfaceStability.Stable
class BinaryType private() extends AtomicType {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
index 059f89f9cd..cee78f4b4a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Boolean` values. Please use the singleton [[DataTypes.BooleanType]].
+ * The data type representing `Boolean` values. Please use the singleton `DataTypes.BooleanType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
index bc6251f024..b1dd5eda36 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Byte` values. Please use the singleton [[DataTypes.ByteType]].
+ * The data type representing `Byte` values. Please use the singleton `DataTypes.ByteType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index 21f3497ba0..2342036a57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -23,7 +23,7 @@ import org.apache.spark.annotation.InterfaceStability
* The data type representing calendar time intervals. The calendar time interval is stored
* internally in two components: number of months the number of microseconds.
*
- * Please use the singleton [[DataTypes.CalendarIntervalType]].
+ * Please use the singleton `DataTypes.CalendarIntervalType`.
*
* @note Calendar intervals are not comparable.
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 8d0ecc051f..0c0574b845 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
* A date type, supporting "0001-01-01" through "9999-12-31".
*
- * Please use the singleton [[DataTypes.DateType]].
+ * Please use the singleton `DataTypes.DateType`.
*
* Internally, this is represented as the number of days from 1970-01-01.
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index d7ca0cbeed..cecad3b7b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
*
* The default precision and scale is (10, 0).
*
- * Please use [[DataTypes.createDecimalType()]] to create a specific instance.
+ * Please use `DataTypes.createDecimalType()` to create a specific instance.
*
* @since 1.3.0
*/
@@ -92,7 +92,7 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
}
/**
- * The default size of a value of the DecimalType is 8 bytes (precision <= 18) or 16 bytes.
+ * The default size of a value of the DecimalType is 8 bytes (precision &lt;= 18) or 16 bytes.
*/
override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index c21ac0e43e..400f7aed6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
import org.apache.spark.util.Utils
/**
- * The data type representing `Double` values. Please use the singleton [[DataTypes.DoubleType]].
+ * The data type representing `Double` values. Please use the singleton `DataTypes.DoubleType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index c5bf8883ba..b9812b236d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
import org.apache.spark.util.Utils
/**
- * The data type representing `Float` values. Please use the singleton [[DataTypes.FloatType]].
+ * The data type representing `Float` values. Please use the singleton `DataTypes.FloatType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
index 724e59c0bc..dca612ecbf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Int` values. Please use the singleton [[DataTypes.IntegerType]].
+ * The data type representing `Int` values. Please use the singleton `DataTypes.IntegerType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
index 42285a9d0a..396c335570 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Long` values. Please use the singleton [[DataTypes.LongType]].
+ * The data type representing `Long` values. Please use the singleton `DataTypes.LongType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 3a32aa43d1..fbf3a61786 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.annotation.InterfaceStability
/**
* The data type for Maps. Keys in a map are not allowed to have `null` values.
*
- * Please use [[DataTypes.createMapType()]] to create a specific instance.
+ * Please use `DataTypes.createMapType()` to create a specific instance.
*
* @param keyType The data type of map keys.
* @param valueType The data type of map values.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
index bdf9a819d0..494225b47a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.InterfaceStability
/**
- * The data type representing `NULL` values. Please use the singleton [[DataTypes.NullType]].
+ * The data type representing `NULL` values. Please use the singleton `DataTypes.NullType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
index 3fee299d57..1410d5ba0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
- * The data type representing `Short` values. Please use the singleton [[DataTypes.ShortType]].
+ * The data type representing `Short` values. Please use the singleton `DataTypes.ShortType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 5d5a6f52a3..d1c0da3479 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
import org.apache.spark.unsafe.types.UTF8String
/**
- * The data type representing `String` values. Please use the singleton [[DataTypes.StringType]].
+ * The data type representing `String` values. Please use the singleton `DataTypes.StringType`.
*
* @since 1.3.0
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index 4540d8358a..2875995420 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
/**
* The data type representing `java.sql.Timestamp` values.
- * Please use the singleton [[DataTypes.TimestampType]].
+ * Please use the singleton `DataTypes.TimestampType`.
*
* @since 1.3.0
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index a77937efd7..5be9a99369 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -239,8 +239,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
}
/**
- * Loads a JSON file ([[http://jsonlines.org/ JSON Lines text format or newline-delimited JSON]])
- * and returns the result as a [[DataFrame]].
+ * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
+ * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
* See the documentation on the overloaded `json()` method with varargs for more details.
*
* @since 1.4.0
@@ -251,8 +251,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
}
/**
- * Loads a JSON file ([[http://jsonlines.org/ JSON Lines text format or newline-delimited JSON]])
- * and returns the result as a [[DataFrame]].
+ * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
+ * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
*
* This function goes through the input once to determine the input schema. If you know the
* schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -297,8 +297,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
def json(paths: String*): DataFrame = format("json").load(paths : _*)
/**
- * Loads a `JavaRDD[String]` storing JSON objects ([[http://jsonlines.org/ JSON Lines text format
- * or newline-delimited JSON]]) and returns the result as a [[DataFrame]].
+ * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON
+ * Lines text format or newline-delimited JSON</a>) and returns the result as
+ * a [[DataFrame]].
*
* Unless the schema is specified using [[schema]] function, this function goes through the
* input once to determine the input schema.
@@ -309,8 +310,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
def json(jsonRDD: JavaRDD[String]): DataFrame = json(jsonRDD.rdd)
/**
- * Loads an `RDD[String]` storing JSON objects ([[http://jsonlines.org/ JSON Lines text format or
- * newline-delimited JSON]]) and returns the result as a [[DataFrame]].
+ * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
+ * text format or newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
*
* Unless the schema is specified using [[schema]] function, this function goes through the
* input once to determine the input schema.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 6335fc4579..a9a861c463 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -48,8 +48,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
*
* This method implements a variation of the Greenwald-Khanna algorithm (with some speed
* optimizations).
- * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
- * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
+ * The algorithm was first present in <a href="http://dx.doi.org/10.1145/375663.375670">
+ * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
*
* @param col the name of the numerical column
* @param probabilities a list of quantile probabilities
@@ -184,7 +184,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
+ * Schenker, and Papadimitriou.
* The `support` should be greater than 1e-4.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
@@ -230,7 +231,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
+ * Schenker, and Papadimitriou.
* Uses a `default` support of 1%.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
@@ -248,7 +250,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+ * and Papadimitriou.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
* backward compatibility of the schema of the resulting [[DataFrame]].
@@ -291,7 +294,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
/**
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+ * and Papadimitriou.
* Uses a `default` support of 1%.
*
* This function is meant for exploratory data analysis, as we make no guarantee about the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 15281f24fa..2d863422fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -442,8 +442,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
}
/**
- * Saves the content of the [[DataFrame]] in JSON format ([[http://jsonlines.org/ JSON Lines text
- * format or newline-delimited JSON]]) at the specified path.
+ * Saves the content of the [[DataFrame]] in JSON format (<a href="http://jsonlines.org/">
+ * JSON Lines text format or newline-delimited JSON</a>) at the specified path.
* This is equivalent to:
* {{{
* format("json").save(path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 2fae93651b..858fa4c760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -172,7 +172,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
def experimental: ExperimentalMethods = sparkSession.experimental
/**
- * Returns a [[DataFrame]] with no rows or columns.
+ * Returns a `DataFrame` with no rows or columns.
*
* @group basic
* @since 1.3.0
@@ -254,7 +254,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
* (Scala-specific) Implicit methods available in Scala for converting
- * common Scala objects into [[DataFrame]]s.
+ * common Scala objects into `DataFrame`s.
*
* {{{
* val sqlContext = new SQLContext(sc)
@@ -298,7 +298,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
+ * Convert a [[BaseRelation]] created for external data sources into a `DataFrame`.
*
* @group dataframes
* @since 1.3.0
@@ -309,7 +309,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: DeveloperApi ::
- * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s using the given schema.
+ * Creates a `DataFrame` from an [[RDD]] containing [[Row]]s using the given schema.
* It is important to make sure that the structure of every [[Row]] of the provided RDD matches
* the provided schema. Otherwise, there will be runtime exception.
* Example:
@@ -438,7 +438,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: DeveloperApi ::
- * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
+ * Creates a `DataFrame` from a [[JavaRDD]] containing [[Row]]s using the given schema.
* It is important to make sure that the structure of every [[Row]] of the provided RDD matches
* the provided schema. Otherwise, there will be runtime exception.
*
@@ -453,7 +453,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: DeveloperApi ::
- * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
+ * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema.
* It is important to make sure that the structure of every [[Row]] of the provided List matches
* the provided schema. Otherwise, there will be runtime exception.
*
@@ -504,7 +504,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
- * [[DataFrame]].
+ * `DataFrame`.
* {{{
* sqlContext.read.parquet("/path/to/file.parquet")
* sqlContext.read.schema(schema).json("/path/to/file.json")
@@ -518,7 +518,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Returns a [[DataStreamReader]] that can be used to read streaming data in as a [[DataFrame]].
+ * Returns a [[DataStreamReader]] that can be used to read streaming data in as a `DataFrame`.
* {{{
* sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
* sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
@@ -617,7 +617,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Registers the given [[DataFrame]] as a temporary table in the catalog. Temporary tables exist
+ * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
* only during the lifetime of this instance of SQLContext.
*/
private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = {
@@ -638,7 +638,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in a range from 0 to `end` (exclusive) with step value 1.
*
* @since 1.4.1
@@ -650,7 +650,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with step value 1.
*
* @since 1.4.0
@@ -662,7 +662,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in a range from `start` to `end` (exclusive) with a step value.
*
* @since 2.0.0
@@ -676,7 +676,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* :: Experimental ::
- * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+ * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
* in an range from `start` to `end` (exclusive) with an step value, with partition number
* specified.
*
@@ -690,7 +690,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. The dialect that is
+ * Executes a SQL query using Spark, returning the result as a `DataFrame`. The dialect that is
* used for SQL parsing can be configured with 'spark.sql.dialect'.
*
* @group basic
@@ -699,7 +699,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
def sql(sqlText: String): DataFrame = sparkSession.sql(sqlText)
/**
- * Returns the specified table as a [[DataFrame]].
+ * Returns the specified table as a `DataFrame`.
*
* @group ddl_ops
* @since 1.3.0
@@ -709,7 +709,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Returns a [[DataFrame]] containing names of existing tables in the current database.
+ * Returns a `DataFrame` containing names of existing tables in the current database.
* The returned DataFrame has two columns, tableName and isTemporary (a Boolean
* indicating if a table is a temporary one or not).
*
@@ -721,7 +721,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Returns a [[DataFrame]] containing names of existing tables in the given database.
+ * Returns a `DataFrame` containing names of existing tables in the given database.
* The returned DataFrame has two columns, tableName and isTemporary (a Boolean
* indicating if a table is a temporary one or not).
*
@@ -799,8 +799,8 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
- * [[DataFrame]] if no paths are passed in.
+ * Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
+ * `DataFrame` if no paths are passed in.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().parquet()`.
@@ -816,7 +816,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
+ * Loads a JSON file (one object per line), returning the result as a `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
@@ -829,7 +829,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads a JSON file (one object per line) and applies the given schema,
- * returning the result as a [[DataFrame]].
+ * returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -850,7 +850,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
- * [[DataFrame]].
+ * `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
@@ -861,7 +861,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
- * [[DataFrame]].
+ * `DataFrame`.
* It goes through the entire dataset once to determine the schema.
*
* @group specificdata
@@ -872,7 +872,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
- * returning the result as a [[DataFrame]].
+ * returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -884,7 +884,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
- * schema, returning the result as a [[DataFrame]].
+ * schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -896,7 +896,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads an RDD[String] storing JSON objects (one object per record) inferring the
- * schema, returning the result as a [[DataFrame]].
+ * schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -908,7 +908,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
/**
* Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
- * schema, returning the result as a [[DataFrame]].
+ * schema, returning the result as a `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -995,7 +995,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table.
*
* @group specificdata
@@ -1007,7 +1007,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table. Partitions of the table will be retrieved in parallel based on the parameters
* passed to this function.
*
@@ -1031,10 +1031,10 @@ class SQLContext private[sql](val sparkSession: SparkSession)
}
/**
- * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+ * Construct a `DataFrame` representing the database table accessible via JDBC URL
* url named table. The theParts parameter gives a list expressions
* suitable for inclusion in WHERE clauses; each one defines one partition
- * of the [[DataFrame]].
+ * of the `DataFrame`.
*
* @group specificdata
* @deprecated As of 1.4.0, replaced by `read().jdbc()`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
index b9dbfcf773..cdb755edc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
@@ -69,7 +69,8 @@ object FrequentItems extends Logging {
/**
* Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
- * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+ * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+ * and Papadimitriou.
* The `support` should be greater than 1e-4.
* For Internal use only.
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index c02b154987..2b2e706125 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -41,8 +41,8 @@ object StatFunctions extends Logging {
*
* This method implements a variation of the Greenwald-Khanna algorithm (with some speed
* optimizations).
- * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
- * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
+ * The algorithm was first present in <a href="http://dx.doi.org/10.1145/375663.375670">
+ * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
*
* @param df the dataframe
* @param cols numerical columns of the dataframe
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index eea9841400..058c38c8cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
/**
* :: Experimental ::
- * A base class for user-defined aggregations, which can be used in [[Dataset]] operations to take
+ * A base class for user-defined aggregations, which can be used in `Dataset` operations to take
* all of the elements of a group and reduce them to a single value.
*
* For example, the following aggregator extracts an `int` from a specific class and adds them up:
@@ -80,19 +80,19 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
def finish(reduction: BUF): OUT
/**
- * Specifies the [[Encoder]] for the intermediate value type.
+ * Specifies the `Encoder` for the intermediate value type.
* @since 2.0.0
*/
def bufferEncoder: Encoder[BUF]
/**
- * Specifies the [[Encoder]] for the final ouput value type.
+ * Specifies the `Encoder` for the final ouput value type.
* @since 2.0.0
*/
def outputEncoder: Encoder[OUT]
/**
- * Returns this `Aggregator` as a [[TypedColumn]] that can be used in [[Dataset]].
+ * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset`.
* operations.
* @since 1.6.0
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 36dd5f78ac..b13fe70160 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.functions
import org.apache.spark.sql.types.DataType
/**
- * A user-defined function. To create one, use the `udf` functions in [[functions]].
+ * A user-defined function. To create one, use the `udf` functions in `functions`.
*
* As an example:
* {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index 327bc379d4..f3cf3052ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -117,8 +117,8 @@ object Window {
* "current row", while "-1" means the row before the current row, and "5" means the fifth row
* after the current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A row based boundary is based on the position of the row within the partition.
@@ -148,9 +148,9 @@ object Window {
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
@@ -166,8 +166,8 @@ object Window {
* while "-1" means one off before the current row, and "5" means the five off after the
* current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
@@ -200,9 +200,9 @@ object Window {
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 4a8ce695bd..de7d7a1772 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -85,8 +85,8 @@ class WindowSpec private[sql](
* "current row", while "-1" means the row before the current row, and "5" means the fifth row
* after the current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `[Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A row based boundary is based on the position of the row within the partition.
@@ -116,9 +116,9 @@ class WindowSpec private[sql](
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rowsBetween.
@@ -133,8 +133,8 @@ class WindowSpec private[sql](
* while "-1" means one off before the current row, and "5" means the five off after the
* current row.
*
- * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
- * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+ * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+ * and `[Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
@@ -167,9 +167,9 @@ class WindowSpec private[sql](
* }}}
*
* @param start boundary start, inclusive. The frame is unbounded if this is
- * the minimum long value ([[Window.unboundedPreceding]]).
+ * the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value ([[Window.unboundedFollowing]]).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index aa71cb9e3b..650ffd4586 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.aggregate._
/**
* :: Experimental ::
- * Type-safe functions available for [[Dataset]] operations in Scala.
+ * Type-safe functions available for `Dataset` operations in Scala.
*
* Java users should use [[org.apache.spark.sql.expressions.javalang.typed]].
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index bc9788d81f..4976b875fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -32,9 +32,9 @@ import org.apache.spark.sql.types._
abstract class UserDefinedAggregateFunction extends Serializable {
/**
- * A [[StructType]] represents data types of input arguments of this aggregate function.
+ * A `StructType` represents data types of input arguments of this aggregate function.
* For example, if a [[UserDefinedAggregateFunction]] expects two input arguments
- * with type of [[DoubleType]] and [[LongType]], the returned [[StructType]] will look like
+ * with type of `DoubleType` and `LongType`, the returned `StructType` will look like
*
* ```
* new StructType()
@@ -42,7 +42,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
* .add("longInput", LongType)
* ```
*
- * The name of a field of this [[StructType]] is only used to identify the corresponding
+ * The name of a field of this `StructType` is only used to identify the corresponding
* input argument. Users can choose names to identify the input arguments.
*
* @since 1.5.0
@@ -50,10 +50,10 @@ abstract class UserDefinedAggregateFunction extends Serializable {
def inputSchema: StructType
/**
- * A [[StructType]] represents data types of values in the aggregation buffer.
+ * A `StructType` represents data types of values in the aggregation buffer.
* For example, if a [[UserDefinedAggregateFunction]]'s buffer has two values
- * (i.e. two intermediate values) with type of [[DoubleType]] and [[LongType]],
- * the returned [[StructType]] will look like
+ * (i.e. two intermediate values) with type of `DoubleType` and `LongType`,
+ * the returned `StructType` will look like
*
* ```
* new StructType()
@@ -61,7 +61,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
* .add("longInput", LongType)
* ```
*
- * The name of a field of this [[StructType]] is only used to identify the corresponding
+ * The name of a field of this `StructType` is only used to identify the corresponding
* buffer value. Users can choose names to identify the input arguments.
*
* @since 1.5.0
@@ -69,7 +69,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
def bufferSchema: StructType
/**
- * The [[DataType]] of the returned value of this [[UserDefinedAggregateFunction]].
+ * The `DataType` of the returned value of this [[UserDefinedAggregateFunction]].
*
* @since 1.5.0
*/
@@ -121,7 +121,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
def evaluate(buffer: Row): Any
/**
- * Creates a [[Column]] for this UDAF using given [[Column]]s as input arguments.
+ * Creates a `Column` for this UDAF using given `Column`s as input arguments.
*
* @since 1.5.0
*/
@@ -136,8 +136,8 @@ abstract class UserDefinedAggregateFunction extends Serializable {
}
/**
- * Creates a [[Column]] for this UDAF using the distinct values of the given
- * [[Column]]s as input arguments.
+ * Creates a `Column` for this UDAF using the distinct values of the given
+ * `Column`s as input arguments.
*
* @since 1.5.0
*/
@@ -153,7 +153,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
}
/**
- * A [[Row]] representing a mutable aggregation buffer.
+ * A `Row` representing a mutable aggregation buffer.
*
* This is not meant to be extended outside of Spark.
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 7c64e28d24..83857c322a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -40,7 +40,7 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
* SQL dialect of a certain database or jdbc driver.
* Lots of databases define types that aren't explicitly supported
* by the JDBC spec. Some JDBC drivers also report inaccurate
- * information---for instance, BIT(n>1) being reported as a BIT type is quite
+ * information---for instance, BIT(n&gt;1) being reported as a BIT type is quite
* common, even though BIT in JDBC is meant for single-bit values. Also, there
* does not appear to be a standard name for an unbounded string or binary
* type; we use BLOB and CLOB by default but override with database-specific
@@ -134,7 +134,7 @@ abstract class JdbcDialect extends Serializable {
/**
* :: DeveloperApi ::
- * Registry of dialects that apply to every new jdbc [[org.apache.spark.sql.DataFrame]].
+ * Registry of dialects that apply to every new jdbc `org.apache.spark.sql.DataFrame`.
*
* If multiple matching dialects are registered then all matching ones will be
* tried in reverse order. A user-added dialect will thus be applied first,
@@ -148,7 +148,7 @@ abstract class JdbcDialect extends Serializable {
object JdbcDialects {
/**
- * Register a dialect for use on all new matching jdbc [[org.apache.spark.sql.DataFrame]].
+ * Register a dialect for use on all new matching jdbc `org.apache.spark.sql.DataFrame`.
* Reading an existing dialect will cause a move-to-front.
*
* @param dialect The new dialect.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 40b482e4c0..c50733534e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.execution.streaming.StreamingRelation
import org.apache.spark.sql.types.StructType
/**
- * Interface used to load a streaming [[Dataset]] from external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[SparkSession.readStream]] to access this.
+ * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `SparkSession.readStream` to access this.
*
* @since 2.0.0
*/
@@ -109,7 +109,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
/**
- * Loads input data stream in as a [[DataFrame]], for data streams that don't require a path
+ * Loads input data stream in as a `DataFrame`, for data streams that don't require a path
* (e.g. external key-value stores).
*
* @since 2.0.0
@@ -125,7 +125,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
}
/**
- * Loads input in as a [[DataFrame]], for data streams that read from some path.
+ * Loads input in as a `DataFrame`, for data streams that read from some path.
*
* @since 2.0.0
*/
@@ -134,8 +134,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
}
/**
- * Loads a JSON file stream ([[http://jsonlines.org/ JSON Lines text format or newline-delimited
- * JSON]]) and returns the result as a [[DataFrame]].
+ * Loads a JSON file stream (<a href="http://jsonlines.org/">JSON Lines text format or
+ * newline-delimited JSON</a>) and returns the result as a `DataFrame`.
*
* This function goes through the input once to determine the input schema. If you know the
* schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -181,7 +181,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
def json(path: String): DataFrame = format("json").load(path)
/**
- * Loads a CSV file stream and returns the result as a [[DataFrame]].
+ * Loads a CSV file stream and returns the result as a `DataFrame`.
*
* This function will go through the input once to determine the input schema if `inferSchema`
* is enabled. To avoid going through the entire data once, disable `inferSchema` option or
@@ -243,7 +243,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
def csv(path: String): DataFrame = format("csv").load(path)
/**
- * Loads a Parquet file stream, returning the result as a [[DataFrame]].
+ * Loads a Parquet file stream, returning the result as a `DataFrame`.
*
* You can set the following Parquet-specific option(s) for reading Parquet files:
* <ul>
@@ -262,7 +262,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
}
/**
- * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named
+ * Loads text files and returns a `DataFrame` whose schema starts with a string column named
* "value", and followed by partitioned columns if there are any.
*
* Each line in the text files is a new row in the resulting DataFrame. For example:
@@ -285,7 +285,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
def text(path: String): DataFrame = format("text").load(path)
/**
- * Loads text file(s) and returns a [[Dataset]] of String. The underlying schema of the Dataset
+ * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
* contains a single string column named "value".
*
* If the directory structure of the text files contains partitioning information, those are
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index daed1dcb77..b3c600ae53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, Memory
/**
* :: Experimental ::
- * Interface used to write a streaming [[Dataset]] to external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[Dataset.writeStream]] to access this.
+ * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `Dataset.writeStream` to access this.
*
* @since 2.0.0
*/
@@ -273,8 +273,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
/**
* Starts the execution of the streaming query, which will continually send results to the given
- * [[ForeachWriter]] as as new data arrives. The [[ForeachWriter]] can be used to send the data
- * generated by the [[DataFrame]]/[[Dataset]] to an external system.
+ * `ForeachWriter` as as new data arrives. The `ForeachWriter` can be used to send the data
+ * generated by the `DataFrame`/`Dataset` to an external system.
*
* Scala example:
* {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 0a85414451..374313f2ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -31,7 +31,7 @@ trait StreamingQuery {
/**
* Returns the name of the query. This name is unique across all active queries. This can be
- * set in the [[org.apache.spark.sql.DataStreamWriter DataStreamWriter]] as
+ * set in the `org.apache.spark.sql.streaming.DataStreamWriter` as
* `dataframe.writeStream.queryName("query").start()`.
* @since 2.0.0
*/
@@ -45,7 +45,7 @@ trait StreamingQuery {
def id: Long
/**
- * Returns the [[SparkSession]] associated with `this`.
+ * Returns the `SparkSession` associated with `this`.
* @since 2.0.0
*/
def sparkSession: SparkSession
@@ -90,10 +90,11 @@ trait StreamingQuery {
* immediately (if the query was terminated by `stop()`), or throw the exception
* immediately (if the query has terminated with exception).
*
- * @throws StreamingQueryException, if `this` query has terminated with an exception.
+ * @throws StreamingQueryException if the query has terminated with an exception.
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitTermination(): Unit
/**
@@ -106,10 +107,11 @@ trait StreamingQuery {
* `true` immediately (if the query was terminated by `stop()`), or throw the exception
* immediately (if the query has terminated with exception).
*
- * @throws StreamingQueryException, if `this` query has terminated with an exception
+ * @throws StreamingQueryException if the query has terminated with an exception
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitTermination(timeoutMs: Long): Boolean
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index bba7bc753e..53968a82d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.{Clock, SystemClock, Utils}
/**
* :: Experimental ::
- * A class to manage all the [[StreamingQuery]] active on a [[SparkSession]].
+ * A class to manage all the [[StreamingQuery]] active on a `SparkSession`.
*
* @since 2.0.0
*/
@@ -81,10 +81,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
* users need to stop all of them after any of them terminates with exception, and then check the
* `query.exception()` for each query.
*
- * @throws StreamingQueryException, if any query has terminated with an exception
+ * @throws StreamingQueryException if any query has terminated with an exception
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitAnyTermination(): Unit = {
awaitTerminationLock.synchronized {
while (lastTerminatedQuery == null) {
@@ -113,10 +114,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
* users need to stop all of them after any of them terminates with exception, and then check the
* `query.exception()` for each query.
*
- * @throws StreamingQueryException, if any query has terminated with an exception
+ * @throws StreamingQueryException if any query has terminated with an exception
*
* @since 2.0.0
*/
+ @throws[StreamingQueryException]
def awaitAnyTermination(timeoutMs: Long): Boolean = {
val startTime = System.currentTimeMillis
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 4504582187..26ad0eadd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -68,7 +68,7 @@ trait QueryExecutionListener {
/**
* :: Experimental ::
*
- * Manager for [[QueryExecutionListener]]. See [[org.apache.spark.sql.SQLContext.listenerManager]].
+ * Manager for [[QueryExecutionListener]]. See `org.apache.spark.sql.SQLContext.listenerManager`.
*/
@Experimental
@InterfaceStability.Evolving
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index e333fc7feb..a2d64da001 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -57,9 +57,9 @@ import org.apache.spark.util.SerializableJobConf
* @param partition a map from the partition key to the partition value (optional). If the partition
* value is optional, dynamic partition insert will be performed.
* As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
- * Map('a' -> Some('1'), 'b' -> Some('2')),
+ * Map('a' -&gt; Some('1'), 'b' -&gt; Some('2')),
* and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- * would have Map('a' -> Some('1'), 'b' -> None).
+ * would have Map('a' -&gt; Some('1'), 'b' -&gt; None).
* @param child the logical plan representing data to write to.
* @param overwrite overwrite existing table or partitions.
* @param ifNotExists If true, only write if the table or partition does not exist.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 42c92ed5ca..0a7631f782 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -42,8 +42,8 @@ import org.apache.spark.sql.types.StructType
import org.apache.spark.util.SerializableConfiguration
/**
- * [[FileFormat]] for reading ORC files. If this is moved or renamed, please update
- * [[DataSource]]'s backwardCompatibilityMap.
+ * `FileFormat` for reading ORC files. If this is moved or renamed, please update
+ * `DataSource`'s backwardCompatibilityMap.
*/
class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index f5db73b715..3f1f86c278 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -38,7 +38,7 @@ private[orc] object OrcFileOperator extends Logging {
* 1. Retrieving file metadata (schema and compression codecs, etc.)
* 2. Read the actual file content (in this case, the given path should point to the target file)
*
- * @note As recorded by SPARK-8501, ORC writes an empty schema (<code>struct&lt;&gt;</code) to an
+ * @note As recorded by SPARK-8501, ORC writes an empty schema (<code>struct&lt;&gt;</code>) to an
* ORC file if the file contains zero rows. This is OK for Hive since the schema of the
* table is managed by metastore. But this becomes a problem when reading ORC files
* directly from HDFS via Spark SQL, because we have to discover the schema from raw ORC