aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala13
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala3
3 files changed, 13 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 088ec6a0c0..93cf16e6f0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -357,7 +357,7 @@ private[python] class PythonMLLibAPI extends Serializable {
val kMeansAlg = new KMeans()
.setK(k)
.setMaxIterations(maxIterations)
- .setRuns(runs)
+ .internalSetRuns(runs)
.setInitializationMode(initializationMode)
.setInitializationSteps(initializationSteps)
.setEpsilon(epsilon)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 901164a391..67de62bc2e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -119,9 +119,18 @@ class KMeans private (
@Since("0.8.0")
@deprecated("Support for runs is deprecated. This param will have no effect in 2.0.0.", "1.6.0")
def setRuns(runs: Int): this.type = {
+ internalSetRuns(runs)
+ }
+
+ // Internal version of setRuns for Python API, this should be removed at the same time as setRuns
+ // this is done to avoid deprecation warnings in our build.
+ private[mllib] def internalSetRuns(runs: Int): this.type = {
if (runs <= 0) {
throw new IllegalArgumentException("Number of runs must be positive")
}
+ if (runs != 1) {
+ logWarning("Setting number of runs is deprecated and will have no effect in 2.0.0")
+ }
this.runs = runs
this
}
@@ -502,7 +511,7 @@ object KMeans {
seed: Long): KMeansModel = {
new KMeans().setK(k)
.setMaxIterations(maxIterations)
- .setRuns(runs)
+ .internalSetRuns(runs)
.setInitializationMode(initializationMode)
.setSeed(seed)
.run(data)
@@ -528,7 +537,7 @@ object KMeans {
initializationMode: String): KMeansModel = {
new KMeans().setK(k)
.setMaxIterations(maxIterations)
- .setRuns(runs)
+ .internalSetRuns(runs)
.setInitializationMode(initializationMode)
.run(data)
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 8af6750da4..898a09e516 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -105,8 +105,7 @@ object MFDataGenerator {
// optionally generate testing data
if (test) {
- val testSampSize = math.min(
- math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt
+ val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize)
val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
val testOrdered = testOmega.sortWith(_ < _).toArray
val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)