aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/main
diff options
context:
space:
mode:
authorJoseph K. Bradley <joseph@databricks.com>2015-08-12 10:48:52 -0700
committerXiangrui Meng <meng@databricks.com>2015-08-12 10:48:52 -0700
commit70fe558867ccb4bcff6ec673438b03608bb02252 (patch)
treef4f02935c3e5964ca7b00f068aab2ed6c2276bf8 /mllib/src/main
parent57ec27dd7784ce15a2ece8a6c8ac7bd5fd25aea2 (diff)
downloadspark-70fe558867ccb4bcff6ec673438b03608bb02252.tar.gz
spark-70fe558867ccb4bcff6ec673438b03608bb02252.tar.bz2
spark-70fe558867ccb4bcff6ec673438b03608bb02252.zip
[SPARK-9847] [ML] Modified copyValues to distinguish between default, explicit param values
From JIRA: Currently, Params.copyValues copies default parameter values to the paramMap of the target instance, rather than the defaultParamMap. It should copy to the defaultParamMap because explicitly setting a parameter can change the semantics. This issue arose in SPARK-9789, where 2 params "threshold" and "thresholds" for LogisticRegression can have mutually exclusive values. If thresholds is set, then fit() will copy the default value of threshold as well, easily resulting in inconsistent settings for the 2 params. CC: mengxr Author: Joseph K. Bradley <joseph@databricks.com> Closes #8115 from jkbradley/copyvalues-fix.
Diffstat (limited to 'mllib/src/main')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/param/params.scala19
1 files changed, 16 insertions, 3 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index d68f5ff005..91c0a56313 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -559,13 +559,26 @@ trait Params extends Identifiable with Serializable {
/**
* Copies param values from this instance to another instance for params shared by them.
- * @param to the target instance
- * @param extra extra params to be copied
+ *
+ * This handles default Params and explicitly set Params separately.
+ * Default Params are copied from and to [[defaultParamMap]], and explicitly set Params are
+ * copied from and to [[paramMap]].
+ * Warning: This implicitly assumes that this [[Params]] instance and the target instance
+ * share the same set of default Params.
+ *
+ * @param to the target instance, which should work with the same set of default Params as this
+ * source instance
+ * @param extra extra params to be copied to the target's [[paramMap]]
* @return the target instance with param values copied
*/
protected def copyValues[T <: Params](to: T, extra: ParamMap = ParamMap.empty): T = {
- val map = extractParamMap(extra)
+ val map = paramMap ++ extra
params.foreach { param =>
+ // copy default Params
+ if (defaultParamMap.contains(param) && to.hasParam(param.name)) {
+ to.defaultParamMap.put(to.getParam(param.name), defaultParamMap(param))
+ }
+ // copy explicitly set Params
if (map.contains(param) && to.hasParam(param.name)) {
to.set(param.name, map(param))
}