[SPARK-13201][SPARK-13200] Deprecation warning cleanups: KMeans & MFDataGenerator

KMeans: Make a private non-deprecated version of setRuns API so that we can call it from the PythonAPI without deprecation warnings in our own build. Also use it internally when being called from train. Add a logWarning for non-1 values MFDataGenerator: Apparently we are calling round on an integer which now in Scala 2.11 results in a warning (it didn't make any sense before either). Figure out if this is a mistake we can just remove or if we got the types wrong somewhere. I put these two together since they are both deprecation fixes in MLlib and pretty small, but I can split them up if we would prefer it that way. Author: Holden Karau <holden@us.ibm.com> Closes #11112 from holdenk/SPARK-13201-non-deprecated-setRuns-SPARK-mathround-integer.
author: Holden Karau <holden@us.ibm.com> 2016-02-09 08:47:28 +0000
committer: Sean Owen <sowen@cloudera.com> 2016-02-09 08:47:28 +0000
commit: ce83fe9756582e73ada21c3741d15aa9bbf385ed (patch)
tree: f218e7da47b4b2fb95307e50f4eac00f02a2caa4
parent: 159198eff67ee9ead08fba60a585494ea1575147 (diff)
download: spark-ce83fe9756582e73ada21c3741d15aa9bbf385ed.tar.gz
spark-ce83fe9756582e73ada21c3741d15aa9bbf385ed.tar.bz2
spark-ce83fe9756582e73ada21c3741d15aa9bbf385ed.zip
3 files changed, 13 insertions, 5 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 088ec6a0c0..93cf16e6f0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -357,7 +357,7 @@ private[python] class PythonMLLibAPI extends Serializable {
     val kMeansAlg = new KMeans()
       .setK(k)
       .setMaxIterations(maxIterations)
-      .setRuns(runs)
+      .internalSetRuns(runs)
       .setInitializationMode(initializationMode)
       .setInitializationSteps(initializationSteps)
       .setEpsilon(epsilon)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 901164a391..67de62bc2e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -119,9 +119,18 @@ class KMeans private (
   @Since("0.8.0")
   @deprecated("Support for runs is deprecated. This param will have no effect in 2.0.0.", "1.6.0")
   def setRuns(runs: Int): this.type = {
+    internalSetRuns(runs)
+  }
+
+  // Internal version of setRuns for Python API, this should be removed at the same time as setRuns
+  // this is done to avoid deprecation warnings in our build.
+  private[mllib] def internalSetRuns(runs: Int): this.type = {
     if (runs <= 0) {
       throw new IllegalArgumentException("Number of runs must be positive")
     }
+    if (runs != 1) {
+      logWarning("Setting number of runs is deprecated and will have no effect in 2.0.0")
+    }
     this.runs = runs
     this
   }
@@ -502,7 +511,7 @@ object KMeans {
       seed: Long): KMeansModel = {
     new KMeans().setK(k)
       .setMaxIterations(maxIterations)
-      .setRuns(runs)
+      .internalSetRuns(runs)
       .setInitializationMode(initializationMode)
       .setSeed(seed)
       .run(data)
@@ -528,7 +537,7 @@ object KMeans {
       initializationMode: String): KMeansModel = {
     new KMeans().setK(k)
       .setMaxIterations(maxIterations)
-      .setRuns(runs)
+      .internalSetRuns(runs)
       .setInitializationMode(initializationMode)
       .run(data)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 8af6750da4..898a09e516 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -105,8 +105,7 @@ object MFDataGenerator {
 
     // optionally generate testing data
     if (test) {
-      val testSampSize = math.min(
-        math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt
+      val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize)
       val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
       val testOrdered = testOmega.sortWith(_ < _).toArray
       val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)
author	Holden Karau <holden@us.ibm.com>	2016-02-09 08:47:28 +0000
committer	Sean Owen <sowen@cloudera.com>	2016-02-09 08:47:28 +0000
commit	ce83fe9756582e73ada21c3741d15aa9bbf385ed (patch)
tree	f218e7da47b4b2fb95307e50f4eac00f02a2caa4
parent	159198eff67ee9ead08fba60a585494ea1575147 (diff)
download	spark-ce83fe9756582e73ada21c3741d15aa9bbf385ed.tar.gz spark-ce83fe9756582e73ada21c3741d15aa9bbf385ed.tar.bz2 spark-ce83fe9756582e73ada21c3741d15aa9bbf385ed.zip