aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala33
1 files changed, 33 insertions, 0 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index e628059c4a..c58a64001d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -503,6 +503,39 @@ private[python] class PythonMLLibAPI extends Serializable {
}
/**
+ * Java stub for Python mllib LDA.run()
+ */
+ def trainLDAModel(
+ data: JavaRDD[java.util.List[Any]],
+ k: Int,
+ maxIterations: Int,
+ docConcentration: Double,
+ topicConcentration: Double,
+ seed: java.lang.Long,
+ checkpointInterval: Int,
+ optimizer: String): LDAModel = {
+ val algo = new LDA()
+ .setK(k)
+ .setMaxIterations(maxIterations)
+ .setDocConcentration(docConcentration)
+ .setTopicConcentration(topicConcentration)
+ .setCheckpointInterval(checkpointInterval)
+ .setOptimizer(optimizer)
+
+ if (seed != null) algo.setSeed(seed)
+
+ val documents = data.rdd.map(_.asScala.toArray).map { r =>
+ r(0) match {
+ case i: java.lang.Integer => (i.toLong, r(1).asInstanceOf[Vector])
+ case i: java.lang.Long => (i.toLong, r(1).asInstanceOf[Vector])
+ case _ => throw new IllegalArgumentException("input values contains invalid type value.")
+ }
+ }
+ algo.run(documents)
+ }
+
+
+ /**
* Java stub for Python mllib FPGrowth.train(). This stub returns a handle
* to the Java object instead of the content of the Java object. Extra care
* needs to be taken in the Python code to ensure it gets freed on exit; see