aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorFeynman Liang <fliang@databricks.com>2015-08-27 10:44:44 +0100
committerSean Owen <sowen@cloudera.com>2015-08-27 10:44:44 +0100
commit1650f6f56ed4b7f1a7f645c9e8d5ac533464bd78 (patch)
treed7d206bdb016cec0ad233c6c0e694c190d3c7cfb /mllib
parent9625d13d575c97bbff264f6a94838aae72c9202d (diff)
downloadspark-1650f6f56ed4b7f1a7f645c9e8d5ac533464bd78.tar.gz
spark-1650f6f56ed4b7f1a7f645c9e8d5ac533464bd78.tar.bz2
spark-1650f6f56ed4b7f1a7f645c9e8d5ac533464bd78.zip
[SPARK-10254] [ML] Removes Guava dependencies in spark.ml.feature JavaTests
* Replaces `com.google.common` dependencies with `java.util.Arrays` * Small clean up in `JavaNormalizerSuite` Author: Feynman Liang <fliang@databricks.com> Closes #8445 from feynmanliang/SPARK-10254.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java5
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java5
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java5
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java11
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java4
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java5
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java4
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java6
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java5
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java4
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java11
11 files changed, 35 insertions, 30 deletions
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
index d5bd230a95..47d68de599 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -54,7 +55,7 @@ public class JavaBucketizerSuite {
public void bucketizerTest() {
double[] splits = {-0.5, 0.0, 0.5};
- JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+ JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(-0.5),
RowFactory.create(-0.3),
RowFactory.create(0.0),
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
index 845eed61c4..0f6ec64d97 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D;
import org.junit.After;
import org.junit.Assert;
@@ -56,7 +57,7 @@ public class JavaDCTSuite {
@Test
public void javaCompatibilityTest() {
double[] input = new double[] {1D, 2D, 3D, 4D};
- JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+ JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.dense(input))
));
DataFrame dataset = jsql.createDataFrame(data, new StructType(new StructField[]{
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
index 599e9cfd23..03dd5369bd 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -54,7 +55,7 @@ public class JavaHashingTFSuite {
@Test
public void hashingTF() {
- JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+ JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0.0, "Hi I heard about Spark"),
RowFactory.create(0.0, "I wish Java could use case classes"),
RowFactory.create(1.0, "Logistic regression models are neat")
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java
index d82f3b7e8c..e17d549c50 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java
@@ -17,15 +17,15 @@
package org.apache.spark.ml.feature;
-import java.util.List;
+import java.util.Arrays;
-import com.google.common.collect.Lists;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
@@ -48,13 +48,12 @@ public class JavaNormalizerSuite {
@Test
public void normalizer() {
// The tests are to check Java compatibility.
- List<VectorIndexerSuite.FeatureData> points = Lists.newArrayList(
+ JavaRDD<VectorIndexerSuite.FeatureData> points = jsc.parallelize(Arrays.asList(
new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0))
- );
- DataFrame dataFrame = jsql.createDataFrame(jsc.parallelize(points, 2),
- VectorIndexerSuite.FeatureData.class);
+ ));
+ DataFrame dataFrame = jsql.createDataFrame(points, VectorIndexerSuite.FeatureData.class);
Normalizer normalizer = new Normalizer()
.setInputCol("features")
.setOutputCol("normFeatures");
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
index 5cf43fec6f..e8f329f9cf 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPCASuite.java
@@ -18,11 +18,11 @@
package org.apache.spark.ml.feature;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.List;
import scala.Tuple2;
-import com.google.common.collect.Lists;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -78,7 +78,7 @@ public class JavaPCASuite implements Serializable {
@Test
public void testPCA() {
- List<Vector> points = Lists.newArrayList(
+ List<Vector> points = Arrays.asList(
Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0}),
Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
index 5e8211c2c5..834fedbb59 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java
@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -59,7 +60,7 @@ public class JavaPolynomialExpansionSuite {
.setOutputCol("polyFeatures")
.setDegree(3);
- JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+ JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(
Vectors.dense(-2.0, 2.3),
Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java
index 74eb2733f0..ed74363f59 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java
@@ -17,9 +17,9 @@
package org.apache.spark.ml.feature;
+import java.util.Arrays;
import java.util.List;
-import com.google.common.collect.Lists;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -48,7 +48,7 @@ public class JavaStandardScalerSuite {
@Test
public void standardScaler() {
// The tests are to check Java compatibility.
- List<VectorIndexerSuite.FeatureData> points = Lists.newArrayList(
+ List<VectorIndexerSuite.FeatureData> points = Arrays.asList(
new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)),
new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0))
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java
index 3806f65002..02309ce632 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTokenizerSuite.java
@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -54,7 +55,8 @@ public class JavaTokenizerSuite {
.setGaps(true)
.setMinTokenLength(3);
- JavaRDD<TokenizerTestData> rdd = jsc.parallelize(Lists.newArrayList(
+
+ JavaRDD<TokenizerTestData> rdd = jsc.parallelize(Arrays.asList(
new TokenizerTestData("Test of tok.", new String[] {"Test", "tok."}),
new TokenizerTestData("Te,st. punct", new String[] {"Te,st.", "punct"})
));
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
index c7ae5468b9..bfcca62fa1 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
@@ -18,6 +18,7 @@
package org.apache.spark.ml.feature;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -26,8 +27,6 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import com.google.common.collect.Lists;
-
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.feature.VectorIndexerSuite.FeatureData;
import org.apache.spark.mllib.linalg.Vectors;
@@ -52,7 +51,7 @@ public class JavaVectorIndexerSuite implements Serializable {
@Test
public void vectorIndexerAPI() {
// The tests are to check Java compatibility.
- List<FeatureData> points = Lists.newArrayList(
+ List<FeatureData> points = Arrays.asList(
new FeatureData(Vectors.dense(0.0, -2.0)),
new FeatureData(Vectors.dense(1.0, 3.0)),
new FeatureData(Vectors.dense(1.0, 4.0))
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
index 56988b9fb2..f953361427 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
@@ -17,7 +17,7 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.junit.After;
import org.junit.Assert;
@@ -63,7 +63,7 @@ public class JavaVectorSlicerSuite {
};
AttributeGroup group = new AttributeGroup("userFeatures", attrs);
- JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+ JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
));
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
index 39c70157f8..70f5ad9432 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
@@ -17,7 +17,8 @@
package org.apache.spark.ml.feature;
-import com.google.common.collect.Lists;
+import java.util.Arrays;
+
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -50,10 +51,10 @@ public class JavaWord2VecSuite {
@Test
public void testJavaWord2Vec() {
- JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
- RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
- RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
- RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
+ JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))),
+ RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))),
+ RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))
));
StructType schema = new StructType(new StructField[]{
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())