18 files changed, 67 insertions, 62 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 215f9d86f1..08b0cb9b8f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -40,7 +40,7 @@ private[ml] trait PredictorParams extends Params
    * @param schema input schema
    * @param fitting whether this is in fitting
    * @param featuresDataType  SQL DataType for FeaturesType.
-   *                          E.g., [[VectorUDT]] for vector features.
+   *                          E.g., `VectorUDT` for vector features.
    * @return output schema
    */
   protected def validateAndTransformSchema(
@@ -72,7 +72,7 @@ private[ml] trait PredictorParams extends Params
  * in `fit()`.
  *
  * @tparam FeaturesType  Type of features.
- *                       E.g., [[VectorUDT]] for vector features.
+ *                       E.g., `VectorUDT` for vector features.
  * @tparam Learner  Specialization of this class.  If you subclass this type, use this type
  *                  parameter to specify the concrete type.
  * @tparam M  Specialization of [[PredictionModel]].  If you subclass this type, use this type
@@ -122,7 +122,7 @@ abstract class Predictor[
 
   /**
    * Train a model using the given dataset and parameters.
-   * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
+   * Developers can implement this instead of `fit()` to avoid dealing with schema validation
    * and copying parameters into the model.
    *
    * @param dataset  Training dataset
@@ -133,7 +133,7 @@ abstract class Predictor[
   /**
    * Returns the SQL DataType corresponding to the FeaturesType type parameter.
    *
-   * This is used by [[validateAndTransformSchema()]].
+   * This is used by `validateAndTransformSchema()`.
    * This workaround is needed since SQL has different APIs for Scala and Java.
    *
    * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
@@ -160,7 +160,7 @@ abstract class Predictor[
  * Abstraction for a model for prediction tasks (regression and classification).
  *
  * @tparam FeaturesType  Type of features.
- *                       E.g., [[VectorUDT]] for vector features.
+ *                       E.g., `VectorUDT` for vector features.
  * @tparam M  Specialization of [[PredictionModel]].  If you subclass this type, use this type
  *            parameter to specify the concrete type for the corresponding model.
  */
@@ -181,7 +181,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
   /**
    * Returns the SQL DataType corresponding to the FeaturesType type parameter.
    *
-   * This is used by [[validateAndTransformSchema()]].
+   * This is used by `validateAndTransformSchema()`.
    * This workaround is needed since SQL has different APIs for Scala and Java.
    *
    * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector.
@@ -197,7 +197,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
    * the predictions as a new column [[predictionCol]].
    *
    * @param dataset input dataset
-   * @return transformed dataset with [[predictionCol]] of type [[Double]]
+   * @return transformed dataset with [[predictionCol]] of type `Double`
    */
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
@@ -219,7 +219,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
 
   /**
    * Predict label for the given features.
-   * This internal method is used to implement [[transform()]] and output [[predictionCol]].
+   * This internal method is used to implement `transform()` and output [[predictionCol]].
    */
   protected def predict(features: FeaturesType): Double
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 7fbfee75e9..1cd2b1ad84 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -126,7 +126,7 @@ private[attribute] trait AttributeFactory {
   private[attribute] def fromMetadata(metadata: Metadata): Attribute
 
   /**
-   * Creates an [[Attribute]] from a [[StructField]] instance, optionally preserving name.
+   * Creates an [[Attribute]] from a `StructField` instance, optionally preserving name.
    */
   private[ml] def decodeStructField(field: StructField, preserveName: Boolean): Attribute = {
     require(field.dataType.isInstanceOf[NumericType])
@@ -145,7 +145,7 @@ private[attribute] trait AttributeFactory {
   }
 
   /**
-   * Creates an [[Attribute]] from a [[StructField]] instance.
+   * Creates an [[Attribute]] from a `StructField` instance.
    */
   def fromStructField(field: StructField): Attribute = decodeStructField(field, false)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
index f6964054db..25ce0282b1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.DataFrame
 /**
  * ==ML attributes==
  *
- * The ML pipeline API uses [[DataFrame]]s as ML datasets.
+ * The ML pipeline API uses `DataFrame`s as ML datasets.
  * Each dataset consists of typed columns, e.g., string, double, vector, etc.
  * However, knowing only the column type may not be sufficient to handle the data properly.
  * For instance, a double column with values 0.0, 1.0, 2.0, ... may represent some label indices,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index 8fd6b70d4d..d8608d885d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -71,7 +71,7 @@ abstract class Classifier[
    * and put it in an RDD with strong types.
    *
    * @param dataset  DataFrame with columns for labels ([[org.apache.spark.sql.types.NumericType]])
-   *                 and features ([[Vector]]).
+   *                 and features (`Vector`).
    * @param numClasses  Number of classes label can take.  Labels must be integers in the range
    *                    [0, numClasses).
    * @throws SparkException  if any label is not an integer >= 0
@@ -94,7 +94,7 @@ abstract class Classifier[
    * by finding the maximum label value.
    *
    * Label validation (ensuring all labels are integers >= 0) needs to be handled elsewhere,
-   * such as in [[extractLabeledPoints()]].
+   * such as in `extractLabeledPoints()`.
    *
    * @param dataset  Dataset which contains a column [[labelCol]]
    * @param maxNumClasses  Maximum number of classes allowed when inferred from data.  If numClasses
@@ -150,7 +150,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
   /**
    * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
    * parameters:
-   *  - predicted labels as [[predictionCol]] of type [[Double]]
+   *  - predicted labels as [[predictionCol]] of type `Double`
    *  - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`.
    *
    * @param dataset input dataset
@@ -192,10 +192,10 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
 
   /**
    * Predict label for the given features.
-   * This internal method is used to implement [[transform()]] and output [[predictionCol]].
+   * This internal method is used to implement `transform()` and output [[predictionCol]].
    *
    * This default implementation for classification predicts the index of the maximum value
-   * from [[predictRaw()]].
+   * from `predictRaw()`.
    */
   override protected def predict(features: FeaturesType): Double = {
     raw2prediction(predictRaw(features))
@@ -205,7 +205,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
    * Raw prediction for each possible label.
    * The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives
    * a measure of confidence in each possible label (where larger = more confident).
-   * This internal method is used to implement [[transform()]] and output [[rawPredictionCol]].
+   * This internal method is used to implement `transform()` and output [[rawPredictionCol]].
    *
    * @return  vector where element i is the raw prediction for label i.
    *          This raw prediction may be any real number, where a larger value indicates greater
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 93cc1e6f09..95c1337ed5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -225,7 +225,7 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
 
   /**
    * Train a model using the given dataset and parameters.
-   * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation
+   * Developers can implement this instead of `fit()` to avoid dealing with schema validation
    * and copying parameters into the model.
    *
    * @param dataset Training dataset
@@ -321,7 +321,7 @@ class MultilayerPerceptronClassificationModel private[ml] (
 
   /**
    * Predict label for the given features.
-   * This internal method is used to implement [[transform()]] and output [[predictionCol]].
+   * This internal method is used to implement `transform()` and output [[predictionCol]].
    */
   override protected def predict(features: Vector): Double = {
     LabelConverter.decodeLabel(mlpModel.predict(features))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index e89da6ff8b..ef08134809 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -93,7 +93,7 @@ abstract class ProbabilisticClassificationModel[
   /**
    * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
    * parameters:
-   *  - predicted labels as [[predictionCol]] of type [[Double]]
+   *  - predicted labels as [[predictionCol]] of type `Double`
    *  - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`
    *  - probability of each class as [[probabilityCol]] of type `Vector`.
    *
@@ -158,13 +158,15 @@ abstract class ProbabilisticClassificationModel[
    * doing the computation in-place.
    * These predictions are also called class conditional probabilities.
    *
-   * This internal method is used to implement [[transform()]] and output [[probabilityCol]].
+   * This internal method is used to implement `transform()` and output [[probabilityCol]].
    *
    * @return Estimated class conditional probabilities (modified input vector)
    */
   protected def raw2probabilityInPlace(rawPrediction: Vector): Vector
 
-  /** Non-in-place version of [[raw2probabilityInPlace()]] */
+  /**
+   * Non-in-place version of `raw2probabilityInPlace()`
+   */
   protected def raw2probability(rawPrediction: Vector): Vector = {
     val probs = rawPrediction.copy
     raw2probabilityInPlace(probs)
@@ -182,7 +184,7 @@ abstract class ProbabilisticClassificationModel[
    * Predict the probability of each class given the features.
    * These predictions are also called class conditional probabilities.
    *
-   * This internal method is used to implement [[transform()]] and output [[probabilityCol]].
+   * This internal method is used to implement `transform()` and output [[probabilityCol]].
    *
    * @return Estimated class conditional probabilities
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 19978c97d2..f648deced5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -84,7 +84,7 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
  *    $$
  * </blockquote>
  *
- * For the case $E_{max} == E_{min}$, $Rescaled(e_i) = 0.5 * (max + min)$.
+ * For the case \(E_{max} == E_{min}\), \(Rescaled(e_i) = 0.5 * (max + min)\).
  *
  * @note Since zero values will probably be transformed to non-zero values, output of the
  * transformer will be DenseVector even for sparse input.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
index 5dd648aecc..d75a6dc937 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
@@ -25,13 +25,13 @@ import org.apache.spark.sql.DataFrame
  *
  * The `ml.feature` package provides common feature transformers that help convert raw data or
  * features into more suitable forms for model fitting.
- * Most feature transformers are implemented as [[Transformer]]s, which transform one [[DataFrame]]
+ * Most feature transformers are implemented as [[Transformer]]s, which transform one `DataFrame`
  * into another, e.g., [[HashingTF]].
  * Some feature transformers are implemented as [[Estimator]]s, because the transformation requires
  * some aggregated information of the dataset, e.g., document frequencies in [[IDF]].
- * For those feature transformers, calling [[Estimator!.fit]] is required to obtain the model first,
+ * For those feature transformers, calling `Estimator.fit` is required to obtain the model first,
  * e.g., [[IDFModel]], in order to apply transformation.
- * The transformation is usually done by appending new columns to the input [[DataFrame]], so all
+ * The transformation is usually done by appending new columns to the input `DataFrame`, so all
  * input columns are carried over.
  *
  * We try to make each transformer minimal, so it becomes flexible to assemble feature
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index ab0620ca75..12ad800206 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -446,7 +446,7 @@ class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array
   def this(parent: Params, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
-  /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */
+  /** Creates a param pair with a `java.util.List` of values (for Java and Python). */
   def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray)
 
   override def jsonEncode(value: Array[String]): String = {
@@ -471,7 +471,7 @@ class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array
   def this(parent: Params, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
-  /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */
+  /** Creates a param pair with a `java.util.List` of values (for Java and Python). */
   def w(value: java.util.List[java.lang.Double]): ParamPair[Array[Double]] =
     w(value.asScala.map(_.asInstanceOf[Double]).toArray)
 
@@ -501,7 +501,7 @@ class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[In
   def this(parent: Params, name: String, doc: String) =
     this(parent, name, doc, ParamValidators.alwaysTrue)
 
-  /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */
+  /** Creates a param pair with a `java.util.List` of values (for Java and Python). */
   def w(value: java.util.List[java.lang.Integer]): ParamPair[Array[Int]] =
     w(value.asScala.map(_.asInstanceOf[Int]).toArray)
 
@@ -652,7 +652,9 @@ trait Params extends Identifiable with Serializable {
       throw new NoSuchElementException(s"Failed to find a default value for ${param.name}"))
   }
 
-  /** An alias for [[getOrDefault()]]. */
+  /**
+   * An alias for `getOrDefault()`.
+   */
   protected final def $[T](param: Param[T]): T = getOrDefault(param)
 
   /**
@@ -749,14 +751,14 @@ trait Params extends Identifiable with Serializable {
    * Copies param values from this instance to another instance for params shared by them.
    *
    * This handles default Params and explicitly set Params separately.
-   * Default Params are copied from and to [[defaultParamMap]], and explicitly set Params are
-   * copied from and to [[paramMap]].
+   * Default Params are copied from and to `defaultParamMap`, and explicitly set Params are
+   * copied from and to `paramMap`.
    * Warning: This implicitly assumes that this [[Params]] instance and the target instance
    *          share the same set of default Params.
    *
    * @param to the target instance, which should work with the same set of default Params as this
    *           source instance
-   * @param extra extra params to be copied to the target's [[paramMap]]
+   * @param extra extra params to be copied to the target's `paramMap`
    * @return the target instance with param values copied
    */
   protected def copyValues[T <: Params](to: T, extra: ParamMap = ParamMap.empty): T = {
@@ -822,7 +824,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any])
     this
   }
 
-  /** Put param pairs with a [[java.util.List]] of values for Python. */
+  /** Put param pairs with a `java.util.List` of values for Python. */
   private[ml] def put(paramPairs: JList[ParamPair[_]]): this.type = {
     put(paramPairs.asScala: _*)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
index be356575ca..c0a1683d3c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
@@ -40,7 +40,7 @@ private[spark] abstract class Regressor[
 /**
  * :: DeveloperApi ::
  *
- * Model produced by a [[Regressor]].
+ * Model produced by a `Regressor`.
  *
  * @tparam FeaturesType  Type of input features.  E.g., [[org.apache.spark.mllib.linalg.Vector]]
  * @tparam M  Concrete Model type.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index c0e3801499..09bddcdb81 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.util.Utils
 
 /**
- * Trait for [[MLWriter]] and [[MLReader]].
+ * Trait for `MLWriter` and `MLReader`.
  */
 private[util] sealed trait BaseReadWrite {
   private var optionSparkSession: Option[SparkSession] = None
@@ -112,7 +112,7 @@ abstract class MLWriter extends BaseReadWrite with Logging {
   }
 
   /**
-   * [[save()]] handles overwriting and then calls this method.  Subclasses should override this
+   * `save()` handles overwriting and then calls this method.  Subclasses should override this
    * method to implement the actual saving of the instance.
    */
   @Since("1.6.0")
@@ -135,13 +135,13 @@ abstract class MLWriter extends BaseReadWrite with Logging {
 }
 
 /**
- * Trait for classes that provide [[MLWriter]].
+ * Trait for classes that provide `MLWriter`.
  */
 @Since("1.6.0")
 trait MLWritable {
 
   /**
-   * Returns an [[MLWriter]] instance for this ML instance.
+   * Returns an `MLWriter` instance for this ML instance.
    */
   @Since("1.6.0")
   def write: MLWriter
@@ -193,7 +193,7 @@ abstract class MLReader[T] extends BaseReadWrite {
 }
 
 /**
- * Trait for objects that provide [[MLReader]].
+ * Trait for objects that provide `MLReader`.
  *
  * @tparam T ML instance type
  */
@@ -201,7 +201,7 @@ abstract class MLReader[T] extends BaseReadWrite {
 trait MLReadable[T] {
 
   /**
-   * Returns an [[MLReader]] instance for this class.
+   * Returns an `MLReader` instance for this class.
    */
   @Since("1.6.0")
   def read: MLReader[T]
@@ -235,7 +235,7 @@ trait DefaultParamsReadable[T] extends MLReadable[T] {
 }
 
 /**
- * Default [[MLWriter]] implementation for transformers and estimators that contain basic
+ * Default `MLWriter` implementation for transformers and estimators that contain basic
  * (json4s-serializable) params and no data. This will not handle more complex params or types with
  * data (e.g., models with coefficients).
  *
@@ -309,7 +309,7 @@ private[ml] object DefaultParamsWriter {
 }
 
 /**
- * Default [[MLReader]] implementation for transformers and estimators that contain basic
+ * Default `MLReader` implementation for transformers and estimators that contain basic
  * (json4s-serializable) params and no data. This will not handle more complex params or types with
  * data (e.g., models with coefficients).
  *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 4cb9200030..6c5f529fb8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -91,7 +91,7 @@ class LDA private (
    * distributions over topics ("theta").
    *
    * This method assumes the Dirichlet distribution is symmetric and can be described by a single
-   * [[Double]] parameter. It should fail if docConcentration is asymmetric.
+   * `Double` parameter. It should fail if docConcentration is asymmetric.
    */
   @Since("1.3.0")
   def getDocConcentration: Double = {
@@ -113,7 +113,7 @@ class LDA private (
    *
    * If set to a singleton vector Vector(-1), then docConcentration is set automatically. If set to
    * singleton vector Vector(t) where t != -1, then t is replicated to a vector of length k during
-   * `LDAOptimizer.initialize()`. Otherwise, the [[docConcentration]] vector must be length k.
+   * `LDAOptimizer.initialize()`. Otherwise, the `docConcentration` vector must be length k.
    * (default = Vector(-1) = automatic)
    *
    * Optimizer-specific parameter settings:
@@ -137,7 +137,7 @@ class LDA private (
   }
 
   /**
-   * Replicates a [[Double]] docConcentration to create a symmetric prior.
+   * Replicates a `Double` docConcentration to create a symmetric prior.
    */
   @Since("1.3.0")
   def setDocConcentration(docConcentration: Double): this.type = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 85a90fa959..acb83ac31a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -54,7 +54,7 @@ class AssociationRules private[fpm] (
   }
 
   /**
-   * Computes the association rules with confidence above [[minConfidence]].
+   * Computes the association rules with confidence above `minConfidence`.
    * @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
    * @return a `Set[Rule[Item]]` containing the association rules.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index 635da00b69..f6b1143272 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -52,7 +52,7 @@ class FPGrowthModel[Item: ClassTag] @Since("1.3.0") (
     @Since("1.3.0") val freqItemsets: RDD[FreqItemset[Item]])
   extends Saveable with Serializable {
   /**
-   * Generates association rules for the [[Item]]s in [[freqItemsets]].
+   * Generates association rules for the `Item`s in [[freqItemsets]].
    * @param confidence minimal confidence of the rules produced
    */
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 63ea9d3264..723addc715 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -273,7 +273,7 @@ class VectorUDT extends UserDefinedType[Vector] {
 /**
  * Factory methods for [[org.apache.spark.mllib.linalg.Vector]].
  * We don't use the name `Vector` because Scala imports
- * [[scala.collection.immutable.Vector]] by default.
+ * `scala.collection.immutable.Vector` by default.
  */
 @Since("1.0.0")
 object Vectors {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 0efce3c76f..88c73241fb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -78,7 +78,7 @@ abstract class Gradient extends Serializable {
  *
  * for K classes multiclass classification problem.
  *
- * The model weights $w = (w_1, w_2, ..., w_{K-1})^T$ becomes a matrix which has dimension of
+ * The model weights \(w = (w_1, w_2, ..., w_{K-1})^T\) becomes a matrix which has dimension of
  * (K-1) * (N+1) if the intercepts are added. If the intercepts are not added, the dimension
  * will be (K-1) * N.
  *
@@ -93,9 +93,9 @@ abstract class Gradient extends Serializable {
  *    $$
  * </blockquote>
  *
- * where $\alpha(i) = 1$ if $i \ne 0$, and
- *       $\alpha(i) = 0$ if $i == 0$,
- *       $margins_i = x w_i$.
+ * where $\alpha(i) = 1$ if \(i \ne 0\), and
+ *       $\alpha(i) = 0$ if \(i == 0\),
+ *       \(margins_i = x w_i\).
  *
  * For optimization, we have to calculate the first derivative of the loss function, and
  * a simple calculation shows that
@@ -110,18 +110,19 @@ abstract class Gradient extends Serializable {
  *    $$
  * </blockquote>
  *
- * where $\delta_{i, j} = 1$ if $i == j$,
- *       $\delta_{i, j} = 0$ if $i != j$, and
+ * where $\delta_{i, j} = 1$ if \(i == j\),
+ *       $\delta_{i, j} = 0$ if \(i != j\), and
  *       multiplier =
  *         $\exp(margins_i) / (1 + \sum_k^{K-1} \exp(margins_i)) - (1-\alpha(y)\delta_{y, i+1})$
  *
  * If any of margins is larger than 709.78, the numerical computation of multiplier and loss
  * function will be suffered from arithmetic overflow. This issue occurs when there are outliers
  * in data which are far away from hyperplane, and this will cause the failing of training once
- * infinity / infinity is introduced. Note that this is only a concern when max(margins) &gt; 0.
+ * infinity / infinity is introduced. Note that this is only a concern when max(margins)
+ * {@literal >} 0.
  *
- * Fortunately, when max(margins) = maxMargin &gt; 0, the loss function and the multiplier can be
- * easily rewritten into the following equivalent numerically stable formula.
+ * Fortunately, when max(margins) = maxMargin {@literal >} 0, the loss function and the multiplier
+ * can be easily rewritten into the following equivalent numerically stable formula.
  *
  * <blockquote>
  *    $$
@@ -133,7 +134,7 @@ abstract class Gradient extends Serializable {
  *    \end{align}
  *    $$
  * </blockquote>
-
+ *
  * where sum = $\exp(-maxMargin) + \sum_i^{K-1}\exp(margins_i - maxMargin) - 1$.
  *
  * Note that each term, $(margins_i - maxMargin)$ in $\exp$ is smaller than zero; as a result,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
index d680237bf6..551ea35795 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
@@ -54,7 +54,7 @@ case class BinarySample @Since("1.6.0") (
  * cumulative processing, using all batches seen so far.
  *
  * Different tests may be used for assessing statistical significance depending on assumptions
- * satisfied by data. For more details, see [[StreamingTestMethod]]. The `testMethod` specifies
+ * satisfied by data. For more details, see `StreamingTestMethod`. The `testMethod` specifies
  * which test will be used.
  *
  * Use a builder pattern to construct a streaming test in an application, for example:
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 299950785e..fc1d4125a5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -341,7 +341,7 @@ private[tree] sealed class TreeEnsembleModel(
   def predict(features: RDD[Vector]): RDD[Double] = features.map(x => predict(x))
 
   /**
-   * Java-friendly version of [[org.apache.spark.mllib.tree.model.TreeEnsembleModel#predict]].
+   * Java-friendly version of `org.apache.spark.mllib.tree.model.TreeEnsembleModel.predict`.
    */
   def predict(features: JavaRDD[Vector]): JavaRDD[java.lang.Double] = {
     predict(features.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]