[SPARK-9661] [MLLIB] minor clean-up of SPARK-9661

Some minor clean-ups after SPARK-9661. See my inline comments. MechCoder jkbradley Author: Xiangrui Meng <meng@databricks.com> Closes #8190 from mengxr/SPARK-9661-fix.
author: Xiangrui Meng <meng@databricks.com> 2015-08-14 10:25:11 -0700
committer: Joseph K. Bradley <joseph@databricks.com> 2015-08-14 10:25:11 -0700
commit: a0e1abbd010b9e73d472ce12ff1d987678005d32 (patch)
tree: c439c25caad95561001eacb76b08950051332ab4 /mllib
parent: c8677d73666850b37ff937520e538650632ce304 (diff)
download: spark-a0e1abbd010b9e73d472ce12ff1d987678005d32.tar.gz
spark-a0e1abbd010b9e73d472ce12ff1d987678005d32.tar.bz2
spark-a0e1abbd010b9e73d472ce12ff1d987678005d32.zip
4 files changed, 28 insertions, 25 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index f31949f13a..82f05e4a18 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -674,10 +674,9 @@ class DistributedLDAModel private[clustering] (
   }
 
   /** Java-friendly version of [[topTopicsPerDocument]] */
-  def javaTopTopicsPerDocument(
-      k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[java.lang.Double])] = {
+  def javaTopTopicsPerDocument(k: Int): JavaRDD[(java.lang.Long, Array[Int], Array[Double])] = {
     val topics = topTopicsPerDocument(k)
-    topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[java.lang.Double])]].toJavaRDD()
+    topics.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Double])]].toJavaRDD()
   }
 
   // TODO:
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 24fe48cb8f..ef8d786070 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -221,9 +221,7 @@ object Statistics {
   def kolmogorovSmirnovTest(
       data: JavaDoubleRDD,
       distName: String,
-      params: java.lang.Double*): KolmogorovSmirnovTestResult = {
-    val javaParams = params.asInstanceOf[Seq[Double]]
-    KolmogorovSmirnovTest.testOneSample(data.rdd.asInstanceOf[RDD[Double]],
-      distName, javaParams: _*)
+      params: Double*): KolmogorovSmirnovTestResult = {
+    kolmogorovSmirnovTest(data.rdd.asInstanceOf[RDD[Double]], distName, params: _*)
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
index 427be9430d..6e91cde2ea 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java
@@ -22,12 +22,14 @@ import java.util.ArrayList;
 import java.util.Arrays;
 
 import scala.Tuple2;
+import scala.Tuple3;
 
 import org.junit.After;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertArrayEquals;
 import org.junit.Before;
 import org.junit.Test;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.JavaPairRDD;
@@ -44,9 +46,9 @@ public class JavaLDASuite implements Serializable {
   public void setUp() {
     sc = new JavaSparkContext("local", "JavaLDA");
     ArrayList<Tuple2<Long, Vector>> tinyCorpus = new ArrayList<Tuple2<Long, Vector>>();
-    for (int i = 0; i < LDASuite$.MODULE$.tinyCorpus().length; i++) {
-      tinyCorpus.add(new Tuple2<Long, Vector>((Long)LDASuite$.MODULE$.tinyCorpus()[i]._1(),
-          LDASuite$.MODULE$.tinyCorpus()[i]._2()));
+    for (int i = 0; i < LDASuite.tinyCorpus().length; i++) {
+      tinyCorpus.add(new Tuple2<Long, Vector>((Long)LDASuite.tinyCorpus()[i]._1(),
+          LDASuite.tinyCorpus()[i]._2()));
     }
     JavaRDD<Tuple2<Long, Vector>> tmpCorpus = sc.parallelize(tinyCorpus, 2);
     corpus = JavaPairRDD.fromJavaRDD(tmpCorpus);
@@ -60,7 +62,7 @@ public class JavaLDASuite implements Serializable {
 
   @Test
   public void localLDAModel() {
-    Matrix topics = LDASuite$.MODULE$.tinyTopics();
+    Matrix topics = LDASuite.tinyTopics();
     double[] topicConcentration = new double[topics.numRows()];
     Arrays.fill(topicConcentration, 1.0D / topics.numRows());
     LocalLDAModel model = new LocalLDAModel(topics, Vectors.dense(topicConcentration), 1D, 100D);
@@ -110,8 +112,8 @@ public class JavaLDASuite implements Serializable {
     assertEquals(roundedLocalTopicSummary.length, k);
 
     // Check: log probabilities
-    assert(model.logLikelihood() < 0.0);
-    assert(model.logPrior() < 0.0);
+    assertTrue(model.logLikelihood() < 0.0);
+    assertTrue(model.logPrior() < 0.0);
 
     // Check: topic distributions
     JavaPairRDD<Long, Vector> topicDistributions = model.javaTopicDistributions();
@@ -126,8 +128,12 @@ public class JavaLDASuite implements Serializable {
     assertEquals(topicDistributions.count(), nonEmptyCorpus.count());
 
     // Check: javaTopTopicsPerDocuments
-    JavaRDD<scala.Tuple3<java.lang.Long, int[], java.lang.Double[]>> topTopics =
-      model.javaTopTopicsPerDocument(3);
+    Tuple3<Long, int[], double[]> topTopics = model.javaTopTopicsPerDocument(3).first();
+    Long docId = topTopics._1(); // confirm doc ID type
+    int[] topicIndices = topTopics._2();
+    double[] topicWeights = topTopics._3();
+    assertEquals(3, topicIndices.length);
+    assertEquals(3, topicWeights.length);
   }
 
   @Test
@@ -177,18 +183,18 @@ public class JavaLDASuite implements Serializable {
 
     // check: logLikelihood.
     ArrayList<Tuple2<Long, Vector>> docsSingleWord = new ArrayList<Tuple2<Long, Vector>>();
-    docsSingleWord.add(new Tuple2<Long, Vector>(Long.valueOf(0), Vectors.dense(1.0, 0.0, 0.0)));
+    docsSingleWord.add(new Tuple2<Long, Vector>(0L, Vectors.dense(1.0, 0.0, 0.0)));
     JavaPairRDD<Long, Vector> single = JavaPairRDD.fromJavaRDD(sc.parallelize(docsSingleWord));
     double logLikelihood = toyModel.logLikelihood(single);
   }
 
-  private static int tinyK = LDASuite$.MODULE$.tinyK();
-  private static int tinyVocabSize = LDASuite$.MODULE$.tinyVocabSize();
-  private static Matrix tinyTopics = LDASuite$.MODULE$.tinyTopics();
+  private static int tinyK = LDASuite.tinyK();
+  private static int tinyVocabSize = LDASuite.tinyVocabSize();
+  private static Matrix tinyTopics = LDASuite.tinyTopics();
   private static Tuple2<int[], double[]>[] tinyTopicDescription =
-      LDASuite$.MODULE$.tinyTopicDescription();
+      LDASuite.tinyTopicDescription();
   private JavaPairRDD<Long, Vector> corpus;
-  private LocalLDAModel toyModel = LDASuite$.MODULE$.toyModel();
-  private ArrayList<Tuple2<Long, Vector>> toyData = LDASuite$.MODULE$.javaToyData();
+  private LocalLDAModel toyModel = LDASuite.toyModel();
+  private ArrayList<Tuple2<Long, Vector>> toyData = LDASuite.javaToyData();
 
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 926185e90b..99e28499fd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -581,7 +581,7 @@ private[clustering] object LDASuite {
   def javaToyData: JArrayList[(java.lang.Long, Vector)] = {
     val javaData = new JArrayList[(java.lang.Long, Vector)]
     var i = 0
-    while (i < toyData.size) {
+    while (i < toyData.length) {
       javaData.add((toyData(i)._1, toyData(i)._2))
       i += 1
     }
author	Xiangrui Meng <meng@databricks.com>	2015-08-14 10:25:11 -0700
committer	Joseph K. Bradley <joseph@databricks.com>	2015-08-14 10:25:11 -0700
commit	a0e1abbd010b9e73d472ce12ff1d987678005d32 (patch)
tree	c439c25caad95561001eacb76b08950051332ab4 /mllib
parent	c8677d73666850b37ff937520e538650632ce304 (diff)
download	spark-a0e1abbd010b9e73d472ce12ff1d987678005d32.tar.gz spark-a0e1abbd010b9e73d472ce12ff1d987678005d32.tar.bz2 spark-a0e1abbd010b9e73d472ce12ff1d987678005d32.zip