aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorYanbo Liang <ybliang8@gmail.com>2015-05-05 22:57:13 -0700
committerXiangrui Meng <meng@databricks.com>2015-05-05 22:57:13 -0700
commit7b1457839bdac124a07fd6292f6263f0ded48880 (patch)
tree5aee41b648e9b329c7e54c63d9913f894ce33924 /mllib
parentba2b56614d7ab1bd7409b49e9d85c248d8faa48b (diff)
downloadspark-7b1457839bdac124a07fd6292f6263f0ded48880.tar.gz
spark-7b1457839bdac124a07fd6292f6263f0ded48880.tar.bz2
spark-7b1457839bdac124a07fd6292f6263f0ded48880.zip
[SPARK-6267] [MLLIB] Python API for IsotonicRegression
https://issues.apache.org/jira/browse/SPARK-6267 Author: Yanbo Liang <ybliang8@gmail.com> Author: Xiangrui Meng <meng@databricks.com> Closes #5890 from yanboliang/spark-6267 and squashes the following commits: f20541d [Yanbo Liang] Merge pull request #3 from mengxr/SPARK-6267 7f202f9 [Xiangrui Meng] use Vector to have the best Python 2&3 compatibility 4bccfee [Yanbo Liang] fix doctest ec09412 [Yanbo Liang] fix typos 8214bbb [Yanbo Liang] fix code style 5c8ebe5 [Yanbo Liang] Python API for IsotonicRegression
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala18
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala19
2 files changed, 35 insertions, 2 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index b086cec083..426306d78c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -283,6 +283,24 @@ private[python] class PythonMLLibAPI extends Serializable {
}
/**
+ * Java stub for Python mllib IsotonicRegression.run()
+ */
+ def trainIsotonicRegressionModel(
+ data: JavaRDD[Vector],
+ isotonic: Boolean): JList[Object] = {
+ val isotonicRegressionAlg = new IsotonicRegression().setIsotonic(isotonic)
+ val input = data.rdd.map { x =>
+ (x(0), x(1), x(2))
+ }.persist(StorageLevel.MEMORY_AND_DISK)
+ try {
+ val model = isotonicRegressionAlg.run(input)
+ List[AnyRef](model.boundaryVector, model.predictionVector).asJava
+ } finally {
+ data.rdd.unpersist(blocking = false)
+ }
+ }
+
+ /**
* Java stub for Python mllib KMeans.run()
*/
def trainKMeansModel(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 1d7617046b..be2a00c2df 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -21,18 +21,20 @@ import java.io.Serializable
import java.lang.{Double => JDouble}
import java.util.Arrays.binarySearch
+import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
+import org.apache.spark.SparkContext
import org.apache.spark.annotation.Experimental
import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD}
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.mllib.util.{Loader, Saveable}
import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.SQLContext
/**
* :: Experimental ::
@@ -57,6 +59,13 @@ class IsotonicRegressionModel (
assertOrdered(boundaries)
assertOrdered(predictions)(predictionOrd)
+ /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */
+ def this(boundaries: java.lang.Iterable[Double],
+ predictions: java.lang.Iterable[Double],
+ isotonic: java.lang.Boolean) = {
+ this(boundaries.asScala.toArray, predictions.asScala.toArray, isotonic)
+ }
+
/** Asserts the input array is monotone with the given ordering. */
private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = {
var i = 1
@@ -132,6 +141,12 @@ class IsotonicRegressionModel (
}
}
+ /** A convenient method for boundaries called by the Python API. */
+ private[mllib] def boundaryVector: Vector = Vectors.dense(boundaries)
+
+ /** A convenient method for boundaries called by the Python API. */
+ private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
+
override def save(sc: SparkContext, path: String): Unit = {
IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
}