aboutsummaryrefslogtreecommitdiff
path: root/mllib/src
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-04-01 17:19:36 -0700
committerXiangrui Meng <meng@databricks.com>2015-04-01 17:19:36 -0700
commit86b43993517104e6d5ad0785704ceec6db8acc20 (patch)
treeb46aacdbae1be099d5377f6d825d9cc6ff23d9a3 /mllib/src
parent2fa3b47dbf38aae58514473932c69bbd35de4e4c (diff)
downloadspark-86b43993517104e6d5ad0785704ceec6db8acc20.tar.gz
spark-86b43993517104e6d5ad0785704ceec6db8acc20.tar.bz2
spark-86b43993517104e6d5ad0785704ceec6db8acc20.zip
[SPARK-6580] [MLLIB] Optimize LogisticRegressionModel.predictPoint
https://issues.apache.org/jira/browse/SPARK-6580 Author: Yanbo Liang <ybliang8@gmail.com> Closes #5249 from yanboliang/spark-6580 and squashes the following commits: 6f47f21 [Yanbo Liang] address comments 4e0bd0f [Yanbo Liang] fix typos 04e2e2a [Yanbo Liang] trigger jenkins cad5bcd [Yanbo Liang] Optimize LogisticRegressionModel.predictPoint
Diffstat (limited to 'mllib/src')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala55
1 files changed, 26 insertions, 29 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index e7c3599ff6..057b628c6a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -62,6 +62,15 @@ class LogisticRegressionModel (
s" but was given weights of length ${weights.size}")
}
+ private val dataWithBiasSize: Int = weights.size / (numClasses - 1)
+
+ private val weightsArray: Array[Double] = weights match {
+ case dv: DenseVector => dv.values
+ case _ =>
+ throw new IllegalArgumentException(
+ s"weights only supports dense vector but got type ${weights.getClass}.")
+ }
+
/**
* Constructs a [[LogisticRegressionModel]] with weights and intercept for binary classification.
*/
@@ -74,6 +83,7 @@ class LogisticRegressionModel (
* Sets the threshold that separates positive predictions from negative predictions
* in Binary Logistic Regression. An example with prediction score greater than or equal to
* this threshold is identified as an positive, and negative otherwise. The default value is 0.5.
+ * It is only used for binary classification.
*/
@Experimental
def setThreshold(threshold: Double): this.type = {
@@ -84,6 +94,7 @@ class LogisticRegressionModel (
/**
* :: Experimental ::
* Returns the threshold (if any) used for converting raw prediction scores into 0/1 predictions.
+ * It is only used for binary classification.
*/
@Experimental
def getThreshold: Option[Double] = threshold
@@ -91,6 +102,7 @@ class LogisticRegressionModel (
/**
* :: Experimental ::
* Clears the threshold so that `predict` will output raw prediction scores.
+ * It is only used for binary classification.
*/
@Experimental
def clearThreshold(): this.type = {
@@ -106,7 +118,6 @@ class LogisticRegressionModel (
// If dataMatrix and weightMatrix have the same dimension, it's binary logistic regression.
if (numClasses == 2) {
- require(numFeatures == weightMatrix.size)
val margin = dot(weightMatrix, dataMatrix) + intercept
val score = 1.0 / (1.0 + math.exp(-margin))
threshold match {
@@ -114,30 +125,9 @@ class LogisticRegressionModel (
case None => score
}
} else {
- val dataWithBiasSize = weightMatrix.size / (numClasses - 1)
-
- val weightsArray = weightMatrix match {
- case dv: DenseVector => dv.values
- case _ =>
- throw new IllegalArgumentException(
- s"weights only supports dense vector but got type ${weightMatrix.getClass}.")
- }
-
- val margins = (0 until numClasses - 1).map { i =>
- var margin = 0.0
- dataMatrix.foreachActive { (index, value) =>
- if (value != 0.0) margin += value * weightsArray((i * dataWithBiasSize) + index)
- }
- // Intercept is required to be added into margin.
- if (dataMatrix.size + 1 == dataWithBiasSize) {
- margin += weightsArray((i * dataWithBiasSize) + dataMatrix.size)
- }
- margin
- }
-
/**
- * Find the one with maximum margins. If the maxMargin is negative, then the prediction
- * result will be the first class.
+ * Compute and find the one with maximum margins. If the maxMargin is negative, then the
+ * prediction result will be the first class.
*
* PS, if you want to compute the probabilities for each outcome instead of the outcome
* with maximum probability, remember to subtract the maxMargin from margins if maxMargin
@@ -145,13 +135,20 @@ class LogisticRegressionModel (
*/
var bestClass = 0
var maxMargin = 0.0
- var i = 0
- while(i < margins.size) {
- if (margins(i) > maxMargin) {
- maxMargin = margins(i)
+ val withBias = dataMatrix.size + 1 == dataWithBiasSize
+ (0 until numClasses - 1).foreach { i =>
+ var margin = 0.0
+ dataMatrix.foreachActive { (index, value) =>
+ if (value != 0.0) margin += value * weightsArray((i * dataWithBiasSize) + index)
+ }
+ // Intercept is required to be added into margin.
+ if (withBias) {
+ margin += weightsArray((i * dataWithBiasSize) + dataMatrix.size)
+ }
+ if (margin > maxMargin) {
+ maxMargin = margin
bestClass = i + 1
}
- i += 1
}
bestClass.toDouble
}