aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala9
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala38
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala41
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala4
8 files changed, 69 insertions, 42 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 7b56bce41c..965ce3d6f2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -17,6 +17,8 @@
package org.apache.spark.ml.classification
+import java.util.Locale
+
import scala.collection.mutable
import breeze.linalg.{DenseVector => BDV}
@@ -654,7 +656,7 @@ object LogisticRegression extends DefaultParamsReadable[LogisticRegression] {
override def load(path: String): LogisticRegression = super.load(path)
private[classification] val supportedFamilyNames =
- Array("auto", "binomial", "multinomial").map(_.toLowerCase)
+ Array("auto", "binomial", "multinomial").map(_.toLowerCase(Locale.ROOT))
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 55720e2d61..2f50dc7c85 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -17,6 +17,8 @@
package org.apache.spark.ml.clustering
+import java.util.Locale
+
import org.apache.hadoop.fs.Path
import org.json4s.DefaultFormats
import org.json4s.JsonAST.JObject
@@ -173,7 +175,8 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
@Since("1.6.0")
final val optimizer = new Param[String](this, "optimizer", "Optimizer or inference" +
" algorithm used to estimate the LDA model. Supported: " + supportedOptimizers.mkString(", "),
- (o: String) => ParamValidators.inArray(supportedOptimizers).apply(o.toLowerCase))
+ (o: String) =>
+ ParamValidators.inArray(supportedOptimizers).apply(o.toLowerCase(Locale.ROOT)))
/** @group getParam */
@Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
index c49416b240..4bd4aa7113 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
@@ -17,6 +17,8 @@
package org.apache.spark.ml.r
+import java.util.Locale
+
import org.apache.hadoop.fs.Path
import org.json4s._
import org.json4s.JsonDSL._
@@ -91,7 +93,7 @@ private[r] object GeneralizedLinearRegressionWrapper
.setRegParam(regParam)
.setFeaturesCol(rFormula.getFeaturesCol)
// set variancePower and linkPower if family is tweedie; otherwise, set link function
- if (family.toLowerCase == "tweedie") {
+ if (family.toLowerCase(Locale.ROOT) == "tweedie") {
glr.setVariancePower(variancePower).setLinkPower(linkPower)
} else {
glr.setLink(link)
@@ -151,7 +153,7 @@ private[r] object GeneralizedLinearRegressionWrapper
val rDeviance: Double = summary.deviance
val rResidualDegreeOfFreedomNull: Long = summary.residualDegreeOfFreedomNull
val rResidualDegreeOfFreedom: Long = summary.residualDegreeOfFreedom
- val rAic: Double = if (family.toLowerCase == "tweedie" &&
+ val rAic: Double = if (family.toLowerCase(Locale.ROOT) == "tweedie" &&
!Array(0.0, 1.0, 2.0).exists(x => math.abs(x - variancePower) < 1e-8)) {
0.0
} else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 60dd736705..a20ef72446 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.recommendation
import java.{util => ju}
import java.io.IOException
+import java.util.Locale
import scala.collection.mutable
import scala.reflect.ClassTag
@@ -40,8 +41,7 @@ import org.apache.spark.ml.util._
import org.apache.spark.mllib.linalg.CholeskyDecomposition
import org.apache.spark.mllib.optimization.NNLS
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.storage.StorageLevel
@@ -118,10 +118,11 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo
"useful in cross-validation or production scenarios, for handling user/item ids the model " +
"has not seen in the training data. Supported values: " +
s"${ALSModel.supportedColdStartStrategies.mkString(",")}.",
- (s: String) => ALSModel.supportedColdStartStrategies.contains(s.toLowerCase))
+ (s: String) =>
+ ALSModel.supportedColdStartStrategies.contains(s.toLowerCase(Locale.ROOT)))
/** @group expertGetParam */
- def getColdStartStrategy: String = $(coldStartStrategy).toLowerCase
+ def getColdStartStrategy: String = $(coldStartStrategy).toLowerCase(Locale.ROOT)
}
/**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 3be8b533ee..33137b0c0f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -17,6 +17,8 @@
package org.apache.spark.ml.regression
+import java.util.Locale
+
import breeze.stats.{distributions => dist}
import org.apache.hadoop.fs.Path
@@ -57,7 +59,7 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
final val family: Param[String] = new Param(this, "family",
"The name of family which is a description of the error distribution to be used in the " +
s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
- (value: String) => supportedFamilyNames.contains(value.toLowerCase))
+ (value: String) => supportedFamilyNames.contains(value.toLowerCase(Locale.ROOT)))
/** @group getParam */
@Since("2.0.0")
@@ -99,7 +101,7 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
final val link: Param[String] = new Param(this, "link", "The name of link function " +
"which provides the relationship between the linear predictor and the mean of the " +
s"distribution function. Supported options: ${supportedLinkNames.mkString(", ")}",
- (value: String) => supportedLinkNames.contains(value.toLowerCase))
+ (value: String) => supportedLinkNames.contains(value.toLowerCase(Locale.ROOT)))
/** @group getParam */
@Since("2.0.0")
@@ -148,7 +150,7 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
schema: StructType,
fitting: Boolean,
featuresDataType: DataType): StructType = {
- if ($(family).toLowerCase == "tweedie") {
+ if ($(family).toLowerCase(Locale.ROOT) == "tweedie") {
if (isSet(link)) {
logWarning("When family is tweedie, use param linkPower to specify link function. " +
"Setting param link will take no effect.")
@@ -460,13 +462,15 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
*/
def apply(params: GeneralizedLinearRegressionBase): FamilyAndLink = {
val familyObj = Family.fromParams(params)
- val linkObj = if ((params.getFamily.toLowerCase != "tweedie" &&
- params.isSet(params.link)) || (params.getFamily.toLowerCase == "tweedie" &&
- params.isSet(params.linkPower))) {
- Link.fromParams(params)
- } else {
- familyObj.defaultLink
- }
+ val linkObj =
+ if ((params.getFamily.toLowerCase(Locale.ROOT) != "tweedie" &&
+ params.isSet(params.link)) ||
+ (params.getFamily.toLowerCase(Locale.ROOT) == "tweedie" &&
+ params.isSet(params.linkPower))) {
+ Link.fromParams(params)
+ } else {
+ familyObj.defaultLink
+ }
new FamilyAndLink(familyObj, linkObj)
}
}
@@ -519,7 +523,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
* @param params the parameter map containing family name and variance power
*/
def fromParams(params: GeneralizedLinearRegressionBase): Family = {
- params.getFamily.toLowerCase match {
+ params.getFamily.toLowerCase(Locale.ROOT) match {
case Gaussian.name => Gaussian
case Binomial.name => Binomial
case Poisson.name => Poisson
@@ -795,7 +799,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
* @param params the parameter map containing family, link and linkPower
*/
def fromParams(params: GeneralizedLinearRegressionBase): Link = {
- if (params.getFamily.toLowerCase == "tweedie") {
+ if (params.getFamily.toLowerCase(Locale.ROOT) == "tweedie") {
params.getLinkPower match {
case 0.0 => Log
case 1.0 => Identity
@@ -804,7 +808,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
case others => new Power(others)
}
} else {
- params.getLink.toLowerCase match {
+ params.getLink.toLowerCase(Locale.ROOT) match {
case Identity.name => Identity
case Logit.name => Logit
case Log.name => Log
@@ -1253,8 +1257,8 @@ class GeneralizedLinearRegressionSummary private[regression] (
*/
@Since("2.0.0")
lazy val dispersion: Double = if (
- model.getFamily.toLowerCase == Binomial.name ||
- model.getFamily.toLowerCase == Poisson.name) {
+ model.getFamily.toLowerCase(Locale.ROOT) == Binomial.name ||
+ model.getFamily.toLowerCase(Locale.ROOT) == Poisson.name) {
1.0
} else {
val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 2.0))).first().getDouble(0)
@@ -1357,8 +1361,8 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
@Since("2.0.0")
lazy val pValues: Array[Double] = {
if (isNormalSolver) {
- if (model.getFamily.toLowerCase == Binomial.name ||
- model.getFamily.toLowerCase == Poisson.name) {
+ if (model.getFamily.toLowerCase(Locale.ROOT) == Binomial.name ||
+ model.getFamily.toLowerCase(Locale.ROOT) == Poisson.name) {
tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) }
} else {
tValues.map { x =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 5eb707dfe7..cd1950bd76 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -17,6 +17,8 @@
package org.apache.spark.ml.tree
+import java.util.Locale
+
import scala.util.Try
import org.apache.spark.ml.PredictorParams
@@ -218,7 +220,8 @@ private[ml] trait TreeClassifierParams extends Params {
final val impurity: Param[String] = new Param[String](this, "impurity", "Criterion used for" +
" information gain calculation (case-insensitive). Supported options:" +
s" ${TreeClassifierParams.supportedImpurities.mkString(", ")}",
- (value: String) => TreeClassifierParams.supportedImpurities.contains(value.toLowerCase))
+ (value: String) =>
+ TreeClassifierParams.supportedImpurities.contains(value.toLowerCase(Locale.ROOT)))
setDefault(impurity -> "gini")
@@ -230,7 +233,7 @@ private[ml] trait TreeClassifierParams extends Params {
def setImpurity(value: String): this.type = set(impurity, value)
/** @group getParam */
- final def getImpurity: String = $(impurity).toLowerCase
+ final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT)
/** Convert new impurity to old impurity. */
private[ml] def getOldImpurity: OldImpurity = {
@@ -247,7 +250,8 @@ private[ml] trait TreeClassifierParams extends Params {
private[ml] object TreeClassifierParams {
// These options should be lowercase.
- final val supportedImpurities: Array[String] = Array("entropy", "gini").map(_.toLowerCase)
+ final val supportedImpurities: Array[String] =
+ Array("entropy", "gini").map(_.toLowerCase(Locale.ROOT))
}
private[ml] trait DecisionTreeClassifierParams
@@ -267,7 +271,8 @@ private[ml] trait TreeRegressorParams extends Params {
final val impurity: Param[String] = new Param[String](this, "impurity", "Criterion used for" +
" information gain calculation (case-insensitive). Supported options:" +
s" ${TreeRegressorParams.supportedImpurities.mkString(", ")}",
- (value: String) => TreeRegressorParams.supportedImpurities.contains(value.toLowerCase))
+ (value: String) =>
+ TreeRegressorParams.supportedImpurities.contains(value.toLowerCase(Locale.ROOT)))
setDefault(impurity -> "variance")
@@ -279,7 +284,7 @@ private[ml] trait TreeRegressorParams extends Params {
def setImpurity(value: String): this.type = set(impurity, value)
/** @group getParam */
- final def getImpurity: String = $(impurity).toLowerCase
+ final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT)
/** Convert new impurity to old impurity. */
private[ml] def getOldImpurity: OldImpurity = {
@@ -295,7 +300,8 @@ private[ml] trait TreeRegressorParams extends Params {
private[ml] object TreeRegressorParams {
// These options should be lowercase.
- final val supportedImpurities: Array[String] = Array("variance").map(_.toLowerCase)
+ final val supportedImpurities: Array[String] =
+ Array("variance").map(_.toLowerCase(Locale.ROOT))
}
private[ml] trait DecisionTreeRegressorParams extends DecisionTreeParams
@@ -417,7 +423,8 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
s" Supported options: ${RandomForestParams.supportedFeatureSubsetStrategies.mkString(", ")}" +
s", (0.0-1.0], [1-n].",
(value: String) =>
- RandomForestParams.supportedFeatureSubsetStrategies.contains(value.toLowerCase)
+ RandomForestParams.supportedFeatureSubsetStrategies.contains(
+ value.toLowerCase(Locale.ROOT))
|| Try(value.toInt).filter(_ > 0).isSuccess
|| Try(value.toDouble).filter(_ > 0).filter(_ <= 1.0).isSuccess)
@@ -431,13 +438,13 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value)
/** @group getParam */
- final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase
+ final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase(Locale.ROOT)
}
private[spark] object RandomForestParams {
// These options should be lowercase.
final val supportedFeatureSubsetStrategies: Array[String] =
- Array("auto", "all", "onethird", "sqrt", "log2").map(_.toLowerCase)
+ Array("auto", "all", "onethird", "sqrt", "log2").map(_.toLowerCase(Locale.ROOT))
}
private[ml] trait RandomForestClassifierParams
@@ -509,7 +516,8 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
private[ml] object GBTClassifierParams {
// The losses below should be lowercase.
/** Accessor for supported loss settings: logistic */
- final val supportedLossTypes: Array[String] = Array("logistic").map(_.toLowerCase)
+ final val supportedLossTypes: Array[String] =
+ Array("logistic").map(_.toLowerCase(Locale.ROOT))
}
private[ml] trait GBTClassifierParams extends GBTParams with TreeClassifierParams {
@@ -523,12 +531,13 @@ private[ml] trait GBTClassifierParams extends GBTParams with TreeClassifierParam
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
" tries to minimize (case-insensitive). Supported options:" +
s" ${GBTClassifierParams.supportedLossTypes.mkString(", ")}",
- (value: String) => GBTClassifierParams.supportedLossTypes.contains(value.toLowerCase))
+ (value: String) =>
+ GBTClassifierParams.supportedLossTypes.contains(value.toLowerCase(Locale.ROOT)))
setDefault(lossType -> "logistic")
/** @group getParam */
- def getLossType: String = $(lossType).toLowerCase
+ def getLossType: String = $(lossType).toLowerCase(Locale.ROOT)
/** (private[ml]) Convert new loss to old loss. */
override private[ml] def getOldLossType: OldClassificationLoss = {
@@ -544,7 +553,8 @@ private[ml] trait GBTClassifierParams extends GBTParams with TreeClassifierParam
private[ml] object GBTRegressorParams {
// The losses below should be lowercase.
/** Accessor for supported loss settings: squared (L2), absolute (L1) */
- final val supportedLossTypes: Array[String] = Array("squared", "absolute").map(_.toLowerCase)
+ final val supportedLossTypes: Array[String] =
+ Array("squared", "absolute").map(_.toLowerCase(Locale.ROOT))
}
private[ml] trait GBTRegressorParams extends GBTParams with TreeRegressorParams {
@@ -558,12 +568,13 @@ private[ml] trait GBTRegressorParams extends GBTParams with TreeRegressorParams
val lossType: Param[String] = new Param[String](this, "lossType", "Loss function which GBT" +
" tries to minimize (case-insensitive). Supported options:" +
s" ${GBTRegressorParams.supportedLossTypes.mkString(", ")}",
- (value: String) => GBTRegressorParams.supportedLossTypes.contains(value.toLowerCase))
+ (value: String) =>
+ GBTRegressorParams.supportedLossTypes.contains(value.toLowerCase(Locale.ROOT)))
setDefault(lossType -> "squared")
/** @group getParam */
- def getLossType: String = $(lossType).toLowerCase
+ def getLossType: String = $(lossType).toLowerCase(Locale.ROOT)
/** (private[ml]) Convert new loss to old loss. */
override private[ml] def getOldLossType: OldLoss = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 6c5f529fb8..4aa647236b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -17,6 +17,8 @@
package org.apache.spark.mllib.clustering
+import java.util.Locale
+
import breeze.linalg.{DenseVector => BDV}
import org.apache.spark.annotation.{DeveloperApi, Since}
@@ -306,7 +308,7 @@ class LDA private (
@Since("1.4.0")
def setOptimizer(optimizerName: String): this.type = {
this.ldaOptimizer =
- optimizerName.toLowerCase match {
+ optimizerName.toLowerCase(Locale.ROOT) match {
case "em" => new EMLDAOptimizer
case "online" => new OnlineLDAOptimizer
case other =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index 98a3021461..4c7746869d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -17,6 +17,8 @@
package org.apache.spark.mllib.tree.impurity
+import java.util.Locale
+
import org.apache.spark.annotation.{DeveloperApi, Since}
/**
@@ -184,7 +186,7 @@ private[spark] object ImpurityCalculator {
* the given stats.
*/
def getCalculator(impurity: String, stats: Array[Double]): ImpurityCalculator = {
- impurity.toLowerCase match {
+ impurity.toLowerCase(Locale.ROOT) match {
case "gini" => new GiniCalculator(stats)
case "entropy" => new EntropyCalculator(stats)
case "variance" => new VarianceCalculator(stats)