aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorZheng RuiFeng <ruifengz@foxmail.com>2016-11-16 02:46:27 -0800
committerYanbo Liang <ybliang8@gmail.com>2016-11-16 02:46:27 -0800
commitc68f1a38af67957ee28889667193da8f64bb4342 (patch)
tree0525803eac2d7c870762d8c862103699ef2c5259 /mllib
parent241e04bc03efb1379622c0c84299e617512973ac (diff)
downloadspark-c68f1a38af67957ee28889667193da8f64bb4342.tar.gz
spark-c68f1a38af67957ee28889667193da8f64bb4342.tar.bz2
spark-c68f1a38af67957ee28889667193da8f64bb4342.zip
[SPARK-18434][ML] Add missing ParamValidations for ML algos
## What changes were proposed in this pull request? Add missing ParamValidations for ML algos ## How was this patch tested? existing tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #15881 from zhengruifeng/arg_checking.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala13
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala3
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala4
6 files changed, 22 insertions, 10 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 6386dd8a10..46a0730f5d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -44,7 +44,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
* @group param
*/
final val minDocFreq = new IntParam(
- this, "minDocFreq", "minimum number of documents in which a term should appear for filtering")
+ this, "minDocFreq", "minimum number of documents in which a term should appear for filtering" +
+ " (>= 0)", ParamValidators.gtEq(0))
setDefault(minDocFreq -> 0)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 6b913480fd..444006fe1e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -44,7 +44,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
* The number of principal components.
* @group param
*/
- final val k: IntParam = new IntParam(this, "k", "the number of principal components")
+ final val k: IntParam = new IntParam(this, "k", "the number of principal components (> 0)",
+ ParamValidators.gt(0))
/** @group getParam */
def getK: Int = $(k)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index d53f3df514..3ed08c983d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -43,7 +43,8 @@ private[feature] trait Word2VecBase extends Params
* @group param
*/
final val vectorSize = new IntParam(
- this, "vectorSize", "the dimension of codes after transforming from words")
+ this, "vectorSize", "the dimension of codes after transforming from words (> 0)",
+ ParamValidators.gt(0))
setDefault(vectorSize -> 100)
/** @group getParam */
@@ -55,7 +56,8 @@ private[feature] trait Word2VecBase extends Params
* @group expertParam
*/
final val windowSize = new IntParam(
- this, "windowSize", "the window size (context words from [-window, window])")
+ this, "windowSize", "the window size (context words from [-window, window]) (> 0)",
+ ParamValidators.gt(0))
setDefault(windowSize -> 5)
/** @group expertGetParam */
@@ -67,7 +69,8 @@ private[feature] trait Word2VecBase extends Params
* @group param
*/
final val numPartitions = new IntParam(
- this, "numPartitions", "number of partitions for sentences of words")
+ this, "numPartitions", "number of partitions for sentences of words (> 0)",
+ ParamValidators.gt(0))
setDefault(numPartitions -> 1)
/** @group getParam */
@@ -80,7 +83,7 @@ private[feature] trait Word2VecBase extends Params
* @group param
*/
final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " +
- "appear to be included in the word2vec model's vocabulary")
+ "appear to be included in the word2vec model's vocabulary (>= 0)", ParamValidators.gtEq(0))
setDefault(minCount -> 5)
/** @group getParam */
@@ -95,7 +98,7 @@ private[feature] trait Word2VecBase extends Params
*/
final val maxSentenceLength = new IntParam(this, "maxSentenceLength", "Maximum length " +
"(in words) of each sentence in the input data. Any sentence longer than this threshold will " +
- "be divided into chunks up to the size.")
+ "be divided into chunks up to the size (> 0)", ParamValidators.gt(0))
setDefault(maxSentenceLength -> 1000)
/** @group getParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index cd7b4f2a9c..4d274f3a5b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -61,7 +61,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
* @group param
*/
final val featureIndex: IntParam = new IntParam(this, "featureIndex",
- "The index of the feature if featuresCol is a vector column, no effect otherwise.")
+ "The index of the feature if featuresCol is a vector column, no effect otherwise (>= 0)",
+ ParamValidators.gtEq(0))
/** @group getParam */
final def getFeatureIndex: Int = $(featureIndex)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 9639b07496..71c542adf6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -171,7 +171,11 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
* @group setParam
*/
@Since("1.6.0")
- def setSolver(value: String): this.type = set(solver, value)
+ def setSolver(value: String): this.type = {
+ require(Set("auto", "l-bfgs", "normal").contains(value),
+ s"Solver $value was not supported. Supported options: auto, l-bfgs, normal")
+ set(solver, value)
+ }
setDefault(solver -> "auto")
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 57c7e44e97..5a551533be 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -73,11 +73,13 @@ private[ml] trait DecisionTreeParams extends PredictorParams
/**
* Minimum information gain for a split to be considered at a tree node.
+ * Should be >= 0.0.
* (default = 0.0)
* @group param
*/
final val minInfoGain: DoubleParam = new DoubleParam(this, "minInfoGain",
- "Minimum information gain for a split to be considered at a tree node.")
+ "Minimum information gain for a split to be considered at a tree node.",
+ ParamValidators.gtEq(0.0))
/**
* Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be