diff options
author | Xiangrui Meng <meng@databricks.com> | 2016-06-21 08:31:15 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-06-21 08:31:15 -0700 |
commit | 4f83ca1059a3b580fca3f006974ff5ac4d5212a1 (patch) | |
tree | 0d1c12a4c1f67574acdad034bc909e17117fa10b /mllib | |
parent | f3a768b7b96f00f33d2fe4e6c0bf4acf373ad4f4 (diff) | |
download | spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.tar.gz spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.tar.bz2 spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.zip |
[SPARK-15177][.1][R] make SparkR model params and default values consistent with MLlib
## What changes were proposed in this pull request?
This PR is a subset of #13023 by yanboliang to make SparkR model param names and default values consistent with MLlib. I tried to avoid other changes from #13023 to keep this PR minimal. I will send a follow-up PR to improve the documentation.
Main changes:
* `spark.glm`: epsilon -> tol, maxit -> maxIter
* `spark.kmeans`: default k -> 2, default maxIter -> 20, default initMode -> "k-means||"
* `spark.naiveBayes`: laplace -> smoothing, default 1.0
## How was this patch tested?
Existing unit tests.
Author: Xiangrui Meng <meng@databricks.com>
Closes #13801 from mengxr/SPARK-15177.1.
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala | 8 | ||||
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala | 4 |
2 files changed, 6 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala index 9618a3423e..5642abc645 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala @@ -67,8 +67,8 @@ private[r] object GeneralizedLinearRegressionWrapper data: DataFrame, family: String, link: String, - epsilon: Double, - maxit: Int): GeneralizedLinearRegressionWrapper = { + tol: Double, + maxIter: Int): GeneralizedLinearRegressionWrapper = { val rFormula = new RFormula() .setFormula(formula) val rFormulaModel = rFormula.fit(data) @@ -82,8 +82,8 @@ private[r] object GeneralizedLinearRegressionWrapper .setFamily(family) .setLink(link) .setFitIntercept(rFormula.hasIntercept) - .setTol(epsilon) - .setMaxIter(maxit) + .setTol(tol) + .setMaxIter(maxIter) val pipeline = new Pipeline() .setStages(Array(rFormulaModel, glr)) .fit(data) diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala index 28925c79da..1dac246b03 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala @@ -56,7 +56,7 @@ private[r] object NaiveBayesWrapper extends MLReadable[NaiveBayesWrapper] { val PREDICTED_LABEL_INDEX_COL = "pred_label_idx" val PREDICTED_LABEL_COL = "prediction" - def fit(formula: String, data: DataFrame, laplace: Double): NaiveBayesWrapper = { + def fit(formula: String, data: DataFrame, smoothing: Double): NaiveBayesWrapper = { val rFormula = new RFormula() .setFormula(formula) .fit(data) @@ -70,7 +70,7 @@ private[r] object NaiveBayesWrapper extends MLReadable[NaiveBayesWrapper] { val features = featureAttrs.map(_.name.get) // assemble and fit the pipeline val naiveBayes = new NaiveBayes() - .setSmoothing(laplace) + .setSmoothing(smoothing) .setModelType("bernoulli") .setPredictionCol(PREDICTED_LABEL_INDEX_COL) val idxToStr = new IndexToString() |