[DOC][MINOR] ml.feature Scala and Python API sync

## What changes were proposed in this pull request? I reviewed Scala and Python APIs for ml.feature and corrected discrepancies. ## How was this patch tested? Built docs locally, ran style checks Author: Bryan Cutler <cutlerb@gmail.com> Closes #13159 from BryanCutler/ml.feature-api-sync.
author: Bryan Cutler <cutlerb@gmail.com> 2016-05-19 04:48:36 +0200
committer: Nick Pentreath <nickp@za.ibm.com> 2016-05-19 04:48:36 +0200
commit: b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b (patch)
tree: cb5de53388d4a136fafd7eec3fc877f472c7098a /mllib
parent: 4987f39ac7a694e1c8b8b82246eb4fbd863201c4 (diff)
download: spark-b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b.tar.gz
spark-b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b.tar.bz2
spark-b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b.zip
4 files changed, 10 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index f85f4c65af..08beda6d75 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -38,12 +38,12 @@ import org.apache.spark.sql.types.StructType
 private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol {
 
   /**
-   * The minimum of documents in which a term should appear.
+   * The minimum number of documents in which a term should appear.
    * Default: 0
    * @group param
    */
   final val minDocFreq = new IntParam(
-    this, "minDocFreq", "minimum of documents in which a term should appear for filtering")
+    this, "minDocFreq", "minimum number of documents in which a term should appear for filtering")
 
   setDefault(minDocFreq -> 0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 141d3b924b..dbbaa5aa46 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -53,7 +53,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
 
 /**
  * :: Experimental ::
- * PCA trains a model to project vectors to a low-dimensional space using PCA.
+ * PCA trains a model to project vectors to a lower dimensional space of the top [[PCA!.k]]
+ * principal components.
  */
 @Experimental
 class PCA (override val uid: String) extends Estimator[PCAModel] with PCAParams
@@ -106,7 +107,7 @@ object PCA extends DefaultParamsReadable[PCA] {
 
 /**
  * :: Experimental ::
- * Model fitted by [[PCA]].
+ * Model fitted by [[PCA]]. Transforms vectors to a lower dimensional space.
  *
  * @param pc A principal components Matrix. Each column is one principal component.
  * @param explainedVariance A vector of proportions of variance explained by
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index c0feaa01fc..2916b6d9df 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -194,7 +194,9 @@ object RFormula extends DefaultParamsReadable[RFormula] {
 
 /**
  * :: Experimental ::
- * A fitted RFormula. Fitting is required to determine the factor levels of formula terms.
+ * Model fitted by [[RFormula]]. Fitting is required to determine the factor levels of
+ * formula terms.
+ *
  * @param resolvedFormula the fitted R formula.
  * @param pipelineModel the fitted feature model, including factor to index mappings.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 2bc9d225ac..d814528ec4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -240,7 +240,8 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
 
 /**
  * :: Experimental ::
- * Transform categorical features to use 0-based indices instead of their original values.
+ * Model fitted by [[VectorIndexer]]. Transform categorical features to use 0-based indices
+ * instead of their original values.
  *  - Categorical features are mapped to indices.
  *  - Continuous features (columns) are left unchanged.
  * This also appends metadata to the output column, marking features as Numeric (continuous),
author	Bryan Cutler <cutlerb@gmail.com>	2016-05-19 04:48:36 +0200
committer	Nick Pentreath <nickp@za.ibm.com>	2016-05-19 04:48:36 +0200
commit	b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b (patch)
tree	cb5de53388d4a136fafd7eec3fc877f472c7098a /mllib
parent	4987f39ac7a694e1c8b8b82246eb4fbd863201c4 (diff)
download	spark-b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b.tar.gz spark-b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b.tar.bz2 spark-b1bc5ebdd52ed12aea3fdc7b8f2fa2d00ea09c6b.zip