From 1487c9af20a333ead55955acf4c0aa323bea0d07 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Sun, 19 Feb 2017 09:42:50 -0800 Subject: [SPARK-19534][TESTS] Convert Java tests to use lambdas, Java 8 features ## What changes were proposed in this pull request? Convert tests to use Java 8 lambdas, and modest related fixes to surrounding code. ## How was this patch tested? Jenkins tests Author: Sean Owen Closes #16964 from srowen/SPARK-19534. --- .../org/apache/spark/ml/feature/JavaPCASuite.java | 35 ++++++------------- .../mllib/classification/JavaNaiveBayesSuite.java | 10 ++---- .../mllib/clustering/JavaBisectingKMeansSuite.java | 4 +-- .../spark/mllib/clustering/JavaLDASuite.java | 40 ++++++++++------------ .../spark/mllib/fpm/JavaAssociationRulesSuite.java | 6 ++-- .../regression/JavaLinearRegressionSuite.java | 11 ++---- .../spark/mllib/tree/JavaDecisionTreeSuite.java | 15 +++----- 7 files changed, 42 insertions(+), 79 deletions(-) (limited to 'mllib') diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java index 8c0338e284..683ceffeae 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java @@ -21,16 +21,14 @@ import java.io.Serializable; import java.util.Arrays; import java.util.List; -import scala.Tuple2; - import org.junit.Assert; import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.function.Function; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.ml.linalg.Vectors; +import org.apache.spark.mllib.linalg.DenseVector; import org.apache.spark.mllib.linalg.Matrix; import org.apache.spark.mllib.linalg.distributed.RowMatrix; import org.apache.spark.sql.Dataset; @@ -69,35 +67,22 @@ public class JavaPCASuite extends SharedSparkSession { JavaRDD dataRDD = jsc.parallelize(points, 2); RowMatrix mat = new RowMatrix(dataRDD.map( - new Function() { - public org.apache.spark.mllib.linalg.Vector call(Vector vector) { - return new org.apache.spark.mllib.linalg.DenseVector(vector.toArray()); - } - } + (Vector vector) -> (org.apache.spark.mllib.linalg.Vector) new DenseVector(vector.toArray()) ).rdd()); Matrix pc = mat.computePrincipalComponents(3); mat.multiply(pc).rows().toJavaRDD(); - JavaRDD expected = mat.multiply(pc).rows().toJavaRDD().map( - new Function() { - public Vector call(org.apache.spark.mllib.linalg.Vector vector) { - return vector.asML(); - } - } - ); + JavaRDD expected = mat.multiply(pc).rows().toJavaRDD() + .map(org.apache.spark.mllib.linalg.Vector::asML); - JavaRDD featuresExpected = dataRDD.zip(expected).map( - new Function, VectorPair>() { - public VectorPair call(Tuple2 pair) { - VectorPair featuresExpected = new VectorPair(); - featuresExpected.setFeatures(pair._1()); - featuresExpected.setExpected(pair._2()); - return featuresExpected; - } - } - ); + JavaRDD featuresExpected = dataRDD.zip(expected).map(pair -> { + VectorPair featuresExpected1 = new VectorPair(); + featuresExpected1.setFeatures(pair._1()); + featuresExpected1.setExpected(pair._2()); + return featuresExpected1; + }); Dataset df = spark.createDataFrame(featuresExpected, VectorPair.class); PCAModel pca = new PCA() diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java index 6ded42e928..65db3d014f 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java @@ -25,7 +25,6 @@ import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.regression.LabeledPoint; @@ -42,7 +41,7 @@ public class JavaNaiveBayesSuite extends SharedSparkSession { new LabeledPoint(2, Vectors.dense(0.0, 0.0, 2.0)) ); - private int validatePrediction(List points, NaiveBayesModel model) { + private static int validatePrediction(List points, NaiveBayesModel model) { int correct = 0; for (LabeledPoint p : points) { if (model.predict(p.features()) == p.label()) { @@ -80,12 +79,7 @@ public class JavaNaiveBayesSuite extends SharedSparkSession { public void testPredictJavaRDD() { JavaRDD examples = jsc.parallelize(POINTS, 2).cache(); NaiveBayesModel model = NaiveBayes.train(examples.rdd()); - JavaRDD vectors = examples.map(new Function() { - @Override - public Vector call(LabeledPoint v) throws Exception { - return v.features(); - } - }); + JavaRDD vectors = examples.map(LabeledPoint::features); JavaRDD predictions = model.predict(vectors); // Should be able to get the first prediction. predictions.first(); diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java index 3d62b273d2..b4196c6ecd 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaBisectingKMeansSuite.java @@ -17,7 +17,7 @@ package org.apache.spark.mllib.clustering; -import com.google.common.collect.Lists; +import java.util.Arrays; import org.junit.Assert; import org.junit.Test; @@ -31,7 +31,7 @@ public class JavaBisectingKMeansSuite extends SharedSparkSession { @Test public void twoDimensionalData() { - JavaRDD points = jsc.parallelize(Lists.newArrayList( + JavaRDD points = jsc.parallelize(Arrays.asList( Vectors.dense(4, -1), Vectors.dense(4, 1), Vectors.sparse(2, new int[]{0}, new double[]{1.0}) diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 08d6713ab2..38ee2507f2 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -20,6 +20,7 @@ package org.apache.spark.mllib.clustering; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import scala.Tuple2; import scala.Tuple3; @@ -30,7 +31,6 @@ import static org.junit.Assert.*; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Matrix; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; @@ -39,7 +39,7 @@ public class JavaLDASuite extends SharedSparkSession { @Override public void setUp() throws IOException { super.setUp(); - ArrayList> tinyCorpus = new ArrayList<>(); + List> tinyCorpus = new ArrayList<>(); for (int i = 0; i < LDASuite.tinyCorpus().length; i++) { tinyCorpus.add(new Tuple2<>((Long) LDASuite.tinyCorpus()[i]._1(), LDASuite.tinyCorpus()[i]._2())); @@ -53,7 +53,7 @@ public class JavaLDASuite extends SharedSparkSession { Matrix topics = LDASuite.tinyTopics(); double[] topicConcentration = new double[topics.numRows()]; Arrays.fill(topicConcentration, 1.0D / topics.numRows()); - LocalLDAModel model = new LocalLDAModel(topics, Vectors.dense(topicConcentration), 1D, 100D); + LocalLDAModel model = new LocalLDAModel(topics, Vectors.dense(topicConcentration), 1.0, 100.0); // Check: basic parameters assertEquals(model.k(), tinyK); @@ -87,17 +87,17 @@ public class JavaLDASuite extends SharedSparkSession { // Check: basic parameters LocalLDAModel localModel = model.toLocal(); - assertEquals(model.k(), k); - assertEquals(localModel.k(), k); - assertEquals(model.vocabSize(), tinyVocabSize); - assertEquals(localModel.vocabSize(), tinyVocabSize); - assertEquals(model.topicsMatrix(), localModel.topicsMatrix()); + assertEquals(k, model.k()); + assertEquals(k, localModel.k()); + assertEquals(tinyVocabSize, model.vocabSize()); + assertEquals(tinyVocabSize, localModel.vocabSize()); + assertEquals(localModel.topicsMatrix(), model.topicsMatrix()); // Check: topic summaries Tuple2[] roundedTopicSummary = model.describeTopics(); - assertEquals(roundedTopicSummary.length, k); + assertEquals(k, roundedTopicSummary.length); Tuple2[] roundedLocalTopicSummary = localModel.describeTopics(); - assertEquals(roundedLocalTopicSummary.length, k); + assertEquals(k, roundedLocalTopicSummary.length); // Check: log probabilities assertTrue(model.logLikelihood() < 0.0); @@ -107,12 +107,8 @@ public class JavaLDASuite extends SharedSparkSession { JavaPairRDD topicDistributions = model.javaTopicDistributions(); // SPARK-5562. since the topicDistribution returns the distribution of the non empty docs // over topics. Compare it against nonEmptyCorpus instead of corpus - JavaPairRDD nonEmptyCorpus = corpus.filter( - new Function, Boolean>() { - public Boolean call(Tuple2 tuple2) { - return Vectors.norm(tuple2._2(), 1.0) != 0.0; - } - }); + JavaPairRDD nonEmptyCorpus = + corpus.filter(tuple2 -> Vectors.norm(tuple2._2(), 1.0) != 0.0); assertEquals(topicDistributions.count(), nonEmptyCorpus.count()); // Check: javaTopTopicsPerDocuments @@ -155,14 +151,14 @@ public class JavaLDASuite extends SharedSparkSession { LDAModel model = lda.run(corpus); // Check: basic parameters - assertEquals(model.k(), k); - assertEquals(model.vocabSize(), tinyVocabSize); + assertEquals(k, model.k()); + assertEquals(tinyVocabSize, model.vocabSize()); // Check: topic summaries Tuple2[] roundedTopicSummary = model.describeTopics(); - assertEquals(roundedTopicSummary.length, k); + assertEquals(k, roundedTopicSummary.length); Tuple2[] roundedLocalTopicSummary = model.describeTopics(); - assertEquals(roundedLocalTopicSummary.length, k); + assertEquals(k, roundedLocalTopicSummary.length); } @Test @@ -177,7 +173,7 @@ public class JavaLDASuite extends SharedSparkSession { double logPerplexity = toyModel.logPerplexity(pairedDocs); // check: logLikelihood. - ArrayList> docsSingleWord = new ArrayList<>(); + List> docsSingleWord = new ArrayList<>(); docsSingleWord.add(new Tuple2<>(0L, Vectors.dense(1.0, 0.0, 0.0))); JavaPairRDD single = JavaPairRDD.fromJavaRDD(jsc.parallelize(docsSingleWord)); double logLikelihood = toyModel.logLikelihood(single); @@ -190,6 +186,6 @@ public class JavaLDASuite extends SharedSparkSession { LDASuite.tinyTopicDescription(); private JavaPairRDD corpus; private LocalLDAModel toyModel = LDASuite.toyModel(); - private ArrayList> toyData = LDASuite.javaToyData(); + private List> toyData = LDASuite.javaToyData(); } diff --git a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java index 3451e07737..15de566c88 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/fpm/JavaAssociationRulesSuite.java @@ -31,9 +31,9 @@ public class JavaAssociationRulesSuite extends SharedSparkSession { @SuppressWarnings("unchecked") JavaRDD> freqItemsets = jsc.parallelize(Arrays.asList( - new FreqItemset(new String[]{"a"}, 15L), - new FreqItemset(new String[]{"b"}, 35L), - new FreqItemset(new String[]{"a", "b"}, 12L) + new FreqItemset<>(new String[]{"a"}, 15L), + new FreqItemset<>(new String[]{"b"}, 35L), + new FreqItemset<>(new String[]{"a", "b"}, 12L) )); JavaRDD> results = (new AssociationRules()).run(freqItemsets); diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java index a46b1321b3..86c723aa00 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java @@ -24,13 +24,13 @@ import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.function.Function; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.util.LinearDataGenerator; public class JavaLinearRegressionSuite extends SharedSparkSession { - int validatePrediction(List validationData, LinearRegressionModel model) { + private static int validatePrediction( + List validationData, LinearRegressionModel model) { int numAccurate = 0; for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); @@ -87,12 +87,7 @@ public class JavaLinearRegressionSuite extends SharedSparkSession { LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache(); LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(); LinearRegressionModel model = linSGDImpl.run(testRDD.rdd()); - JavaRDD vectors = testRDD.map(new Function() { - @Override - public Vector call(LabeledPoint v) throws Exception { - return v.features(); - } - }); + JavaRDD vectors = testRDD.map(LabeledPoint::features); JavaRDD predictions = model.predict(vectors); // Should be able to get the first prediction. predictions.first(); diff --git a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java index 1dcbbcaa02..0f71deb9ea 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/tree/JavaDecisionTreeSuite.java @@ -25,8 +25,6 @@ import org.junit.Test; import org.apache.spark.SharedSparkSession; import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.function.Function; -import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.mllib.tree.configuration.Algo; import org.apache.spark.mllib.tree.configuration.Strategy; @@ -35,7 +33,7 @@ import org.apache.spark.mllib.tree.model.DecisionTreeModel; public class JavaDecisionTreeSuite extends SharedSparkSession { - int validatePrediction(List validationData, DecisionTreeModel model) { + private static int validatePrediction(List validationData, DecisionTreeModel model) { int numCorrect = 0; for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); @@ -63,7 +61,7 @@ public class JavaDecisionTreeSuite extends SharedSparkSession { DecisionTreeModel model = learner.run(rdd.rdd()); int numCorrect = validatePrediction(arr, model); - Assert.assertTrue(numCorrect == rdd.count()); + Assert.assertEquals(numCorrect, rdd.count()); } @Test @@ -82,15 +80,10 @@ public class JavaDecisionTreeSuite extends SharedSparkSession { DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy); // java compatibility test - JavaRDD predictions = model.predict(rdd.map(new Function() { - @Override - public Vector call(LabeledPoint v1) { - return v1.features(); - } - })); + JavaRDD predictions = model.predict(rdd.map(LabeledPoint::features)); int numCorrect = validatePrediction(arr, model); - Assert.assertTrue(numCorrect == rdd.count()); + Assert.assertEquals(numCorrect, rdd.count()); } } -- cgit v1.2.3