aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2016-06-21 08:31:15 -0700
committerXiangrui Meng <meng@databricks.com>2016-06-21 08:31:15 -0700
commit4f83ca1059a3b580fca3f006974ff5ac4d5212a1 (patch)
tree0d1c12a4c1f67574acdad034bc909e17117fa10b /mllib
parentf3a768b7b96f00f33d2fe4e6c0bf4acf373ad4f4 (diff)
downloadspark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.tar.gz
spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.tar.bz2
spark-4f83ca1059a3b580fca3f006974ff5ac4d5212a1.zip
[SPARK-15177][.1][R] make SparkR model params and default values consistent with MLlib
## What changes were proposed in this pull request? This PR is a subset of #13023 by yanboliang to make SparkR model param names and default values consistent with MLlib. I tried to avoid other changes from #13023 to keep this PR minimal. I will send a follow-up PR to improve the documentation. Main changes: * `spark.glm`: epsilon -> tol, maxit -> maxIter * `spark.kmeans`: default k -> 2, default maxIter -> 20, default initMode -> "k-means||" * `spark.naiveBayes`: laplace -> smoothing, default 1.0 ## How was this patch tested? Existing unit tests. Author: Xiangrui Meng <meng@databricks.com> Closes #13801 from mengxr/SPARK-15177.1.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala4
2 files changed, 6 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
index 9618a3423e..5642abc645 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
@@ -67,8 +67,8 @@ private[r] object GeneralizedLinearRegressionWrapper
data: DataFrame,
family: String,
link: String,
- epsilon: Double,
- maxit: Int): GeneralizedLinearRegressionWrapper = {
+ tol: Double,
+ maxIter: Int): GeneralizedLinearRegressionWrapper = {
val rFormula = new RFormula()
.setFormula(formula)
val rFormulaModel = rFormula.fit(data)
@@ -82,8 +82,8 @@ private[r] object GeneralizedLinearRegressionWrapper
.setFamily(family)
.setLink(link)
.setFitIntercept(rFormula.hasIntercept)
- .setTol(epsilon)
- .setMaxIter(maxit)
+ .setTol(tol)
+ .setMaxIter(maxIter)
val pipeline = new Pipeline()
.setStages(Array(rFormulaModel, glr))
.fit(data)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
index 28925c79da..1dac246b03 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
@@ -56,7 +56,7 @@ private[r] object NaiveBayesWrapper extends MLReadable[NaiveBayesWrapper] {
val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
val PREDICTED_LABEL_COL = "prediction"
- def fit(formula: String, data: DataFrame, laplace: Double): NaiveBayesWrapper = {
+ def fit(formula: String, data: DataFrame, smoothing: Double): NaiveBayesWrapper = {
val rFormula = new RFormula()
.setFormula(formula)
.fit(data)
@@ -70,7 +70,7 @@ private[r] object NaiveBayesWrapper extends MLReadable[NaiveBayesWrapper] {
val features = featureAttrs.map(_.name.get)
// assemble and fit the pipeline
val naiveBayes = new NaiveBayes()
- .setSmoothing(laplace)
+ .setSmoothing(smoothing)
.setModelType("bernoulli")
.setPredictionCol(PREDICTED_LABEL_INDEX_COL)
val idxToStr = new IndexToString()