aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dev/deps/spark-deps-hadoop-2.25
-rw-r--r--dev/deps/spark-deps-hadoop-2.35
-rw-r--r--dev/deps/spark-deps-hadoop-2.45
-rw-r--r--dev/deps/spark-deps-hadoop-2.65
-rw-r--r--dev/deps/spark-deps-hadoop-2.75
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala8
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala5
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala5
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java6
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala4
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala9
-rw-r--r--pom.xml2
-rw-r--r--python/pyspark/ml/classification.py2
16 files changed, 40 insertions, 42 deletions
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index feb3474cf1..5d536b7c24 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -12,8 +12,8 @@ avro-1.7.7.jar
avro-ipc-1.7.7.jar
avro-mapred-1.7.7-hadoop2.jar
bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.11.2.jar
-breeze_2.11-0.11.2.jar
+breeze-macros_2.11-0.12.jar
+breeze_2.11-0.12.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
@@ -147,6 +147,7 @@ scala-parser-combinators_2.11-1.0.4.jar
scala-reflect-2.11.8.jar
scala-xml_2.11-1.0.2.jar
scalap-2.11.8.jar
+shapeless_2.11-2.0.0.jar
slf4j-api-1.7.16.jar
slf4j-log4j12-1.7.16.jar
snappy-0.2.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 3e960358f7..d16f42a97d 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -15,8 +15,8 @@ avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.11.2.jar
-breeze_2.11-0.11.2.jar
+breeze-macros_2.11-0.12.jar
+breeze_2.11-0.12.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
@@ -154,6 +154,7 @@ scala-parser-combinators_2.11-1.0.4.jar
scala-reflect-2.11.8.jar
scala-xml_2.11-1.0.2.jar
scalap-2.11.8.jar
+shapeless_2.11-2.0.0.jar
slf4j-api-1.7.16.jar
slf4j-log4j12-1.7.16.jar
snappy-0.2.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 3fc14a6fbf..2e261cb9a5 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -15,8 +15,8 @@ avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.11.2.jar
-breeze_2.11-0.11.2.jar
+breeze-macros_2.11-0.12.jar
+breeze_2.11-0.12.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
@@ -154,6 +154,7 @@ scala-parser-combinators_2.11-1.0.4.jar
scala-reflect-2.11.8.jar
scala-xml_2.11-1.0.2.jar
scalap-2.11.8.jar
+shapeless_2.11-2.0.0.jar
slf4j-api-1.7.16.jar
slf4j-log4j12-1.7.16.jar
snappy-0.2.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 909fbb8852..67f38f4c22 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.11.2.jar
-breeze_2.11-0.11.2.jar
+breeze-macros_2.11-0.12.jar
+breeze_2.11-0.12.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
@@ -162,6 +162,7 @@ scala-parser-combinators_2.11-1.0.4.jar
scala-reflect-2.11.8.jar
scala-xml_2.11-1.0.2.jar
scalap-2.11.8.jar
+shapeless_2.11-2.0.0.jar
slf4j-api-1.7.16.jar
slf4j-log4j12-1.7.16.jar
snappy-0.2.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index b986a313a0..07583963d9 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
base64-2.3.8.jar
bcprov-jdk15on-1.51.jar
bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.11.2.jar
-breeze_2.11-0.11.2.jar
+breeze-macros_2.11-0.12.jar
+breeze_2.11-0.12.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
@@ -163,6 +163,7 @@ scala-parser-combinators_2.11-1.0.4.jar
scala-reflect-2.11.8.jar
scala-xml_2.11-1.0.2.jar
scalap-2.11.8.jar
+shapeless_2.11-2.0.0.jar
slf4j-api-1.7.16.jar
slf4j-log4j12-1.7.16.jar
snappy-0.2.jar
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 91eee0e69d..7694773c81 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -424,11 +424,6 @@ class LogisticRegression @Since("1.2.0") (
throw new SparkException(msg)
}
- if (!state.actuallyConverged) {
- logWarning("LogisticRegression training finished but the result " +
- s"is not converged because: ${state.convergedReason.get.reason}")
- }
-
/*
The coefficients are trained in the scaled space; we're converting them back to
the original space.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 700a92cc26..2b9912657f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -244,12 +244,6 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
val msg = s"${optimizer.getClass.getName} failed."
throw new SparkException(msg)
}
-
- if (!state.actuallyConverged) {
- logWarning("AFTSurvivalRegression training finished but the result " +
- s"is not converged because: ${state.convergedReason.get.reason}")
- }
-
state.x.toArray.clone()
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 0a155e1844..a0ff7f07aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -325,11 +325,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
throw new SparkException(msg)
}
- if (!state.actuallyConverged) {
- logWarning("LinearRegression training finished but the result " +
- s"is not converged because: ${state.convergedReason.get.reason}")
- }
-
/*
The coefficients are trained in the scaled space; we're converting them back to
the original space.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 9ebba1de0d..90d8a558f1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -784,7 +784,13 @@ class DistributedLDAModel private[clustering] (
@Since("1.5.0")
def topTopicsPerDocument(k: Int): RDD[(Long, Array[Int], Array[Double])] = {
graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
- val topIndices = argtopk(topicCounts, k)
+ // TODO: Remove work-around for the breeze bug.
+ // https://github.com/scalanlp/breeze/issues/561
+ val topIndices = if (k == topicCounts.length) {
+ Seq.range(0, k)
+ } else {
+ argtopk(topicCounts, k)
+ }
val sumCounts = sum(topicCounts)
val weights = if (sumCounts != 0) {
topicCounts(topIndices) / sumCounts
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 2436efba32..e2c6aca553 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -508,8 +508,9 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
val weight = rho()
val N = gammat.rows.toDouble
val alpha = this.alpha.asBreeze.toDenseVector
- val logphat: BDM[Double] = sum(LDAUtils.dirichletExpectation(gammat)(::, breeze.linalg.*)) / N
- val gradf = N * (-LDAUtils.dirichletExpectation(alpha) + logphat.toDenseVector)
+ val logphat: BDV[Double] =
+ sum(LDAUtils.dirichletExpectation(gammat)(::, breeze.linalg.*)).t / N
+ val gradf = N * (-LDAUtils.dirichletExpectation(alpha) + logphat)
val c = N * trigamma(sum(alpha))
val q = -N * trigamma(alpha)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index fd09f35277..e49363c2c6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -213,11 +213,6 @@ object LBFGS extends Logging {
}
lossHistory += state.value
- if (!state.actuallyConverged) {
- logWarning("LBFGS training finished but the result " +
- s"is not converged because: ${state.convergedReason.get.reason}")
- }
-
val weights = Vectors.fromBreeze(state.x)
val lossHistoryArray = lossHistory.result()
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
index ac479c0841..8c0338e284 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
@@ -107,7 +107,11 @@ public class JavaPCASuite extends SharedSparkSession {
.fit(df);
List<Row> result = pca.transform(df).select("pca_features", "expected").toJavaRDD().collect();
for (Row r : result) {
- Assert.assertEquals(r.get(1), r.get(0));
+ Vector calculatedVector = (Vector) r.get(0);
+ Vector expectedVector = (Vector) r.get(1);
+ for (int i = 0; i < calculatedVector.size(); i++) {
+ Assert.assertEquals(calculatedVector.apply(i), expectedVector.apply(i), 1.0e-8);
+ }
}
}
}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index eb050158d4..211e2bc026 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -118,8 +118,8 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
assert(weights.length == 2)
val bdvTopicDist = topicDistribution.asBreeze
val top2Indices = argtopk(bdvTopicDist, 2)
- assert(top2Indices.toArray === indices)
- assert(bdvTopicDist(top2Indices).toArray === weights)
+ assert(top2Indices.toSet === indices.toSet)
+ assert(bdvTopicDist(top2Indices).toArray.toSet === weights.toSet)
}
// Check: log probabilities
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
index a8d82932d3..2f90afdcee 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
@@ -18,9 +18,10 @@
package org.apache.spark.mllib.feature
import org.apache.spark.SparkFunSuite
-import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
class PCASuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -42,7 +43,9 @@ class PCASuite extends SparkFunSuite with MLlibTestSparkContext {
val pca_transform = pca.transform(dataRDD).collect()
val mat_multiply = mat.multiply(pc).rows.collect()
- assert(pca_transform.toSet === mat_multiply.toSet)
- assert(pca.explainedVariance === explainedVariance)
+ pca_transform.zip(mat_multiply).foreach { case (calculated, expected) =>
+ assert(calculated ~== expected relTol 1e-8)
+ }
+ assert(pca.explainedVariance ~== explainedVariance relTol 1e-8)
}
}
diff --git a/pom.xml b/pom.xml
index 4c8671a570..d064cb57dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -657,7 +657,7 @@
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.binary.version}</artifactId>
- <version>0.11.2</version>
+ <version>0.12</version>
<exclusions>
<!-- This is included as a compile-scoped dependency by jtransforms, which is
a dependency of breeze. -->
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3c4af90aca..613bc8cb3e 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1299,7 +1299,7 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
>>> [x.coefficients for x in model.models]
[DenseVector([3.3925, 1.8785]), DenseVector([-4.3016, -6.3163]), DenseVector([-4.5855, 6.1785])]
>>> [x.intercept for x in model.models]
- [-3.6474708290602034, 2.5507881951814495, -1.1016513228162115]
+ [-3.64747..., 2.55078..., -1.10165...]
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
>>> model.transform(test0).head().prediction
1.0