From 561390dbc454fb733c6663982f29e5d5a3aaeab9 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 19 Aug 2015 09:41:09 +0100 Subject: [SPARK-10070] [DOCS] Remove Guava dependencies in user guides `Lists.newArrayList` -> `Arrays.asList` CC jkbradley feynmanliang Anybody into replacing usages of `Lists.newArrayList` in the examples / source code too? this method isn't useful in Java 7 and beyond. Author: Sean Owen Closes #8272 from srowen/SPARK-10070. (cherry picked from commit f141efeafb42b14b5fcfd9aa8c5275162042349f) Signed-off-by: Sean Owen --- docs/ml-features.md | 52 ++++++++++++++++++++++++++-------------------------- docs/ml-guide.md | 21 ++++++++++++--------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/docs/ml-features.md b/docs/ml-features.md index 28a61933f8..d82c85ee75 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -55,7 +55,7 @@ rescaledData.select("features", "label").take(3).foreach(println)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.HashingTF; @@ -70,7 +70,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, "Hi I heard about Spark"), RowFactory.create(0, "I wish Java could use case classes"), RowFactory.create(1, "Logistic regression models are neat") @@ -153,7 +153,7 @@ result.select("result").take(3).foreach(println)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -167,10 +167,10 @@ JavaSparkContext jsc = ... SQLContext sqlContext = ... // Input data: Each row is a bag of words from a sentence or document. -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( - RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))), - RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))), - RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" "))) +JavaRDD jrdd = jsc.parallelize(Arrays.asList( + RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))), + RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))), + RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" "))) )); StructType schema = new StructType(new StructField[]{ new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) @@ -248,7 +248,7 @@ regexTokenized.select("words", "label").take(3).foreach(println)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.RegexTokenizer; @@ -262,7 +262,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, "Hi I heard about Spark"), RowFactory.create(1, "I wish Java could use case classes"), RowFactory.create(2, "Logistic,regression,models,are,neat") @@ -341,7 +341,7 @@ ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(pri [`NGram`](api/java/org/apache/spark/ml/feature/NGram.html) takes an input column name, an output column name, and an optional length parameter n (n=2 by default). {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.NGram; @@ -354,10 +354,10 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( - RowFactory.create(0D, Lists.newArrayList("Hi", "I", "heard", "about", "Spark")), - RowFactory.create(1D, Lists.newArrayList("I", "wish", "Java", "could", "use", "case", "classes")), - RowFactory.create(2D, Lists.newArrayList("Logistic", "regression", "models", "are", "neat")) +JavaRDD jrdd = jsc.parallelize(Arrays.asList( + RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")), + RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")), + RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat")) )); StructType schema = new StructType(new StructField[]{ new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), @@ -427,7 +427,7 @@ binarizedFeatures.collect().foreach(println)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.Binarizer; @@ -439,7 +439,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, 0.1), RowFactory.create(1, 0.8), RowFactory.create(2, 0.2) @@ -511,7 +511,7 @@ result.show()
See the [Java API documentation](api/java/org/apache/spark/ml/feature/PCA.html) for API details. {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -529,7 +529,7 @@ import org.apache.spark.sql.types.StructType; JavaSparkContext jsc = ... SQLContext jsql = ... -JavaRDD data = jsc.parallelize(Lists.newArrayList( +JavaRDD data = jsc.parallelize(Arrays.asList( RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})), RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)), RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)) @@ -593,7 +593,7 @@ polyDF.select("polyFeatures").take(3).foreach(println)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -614,7 +614,7 @@ PolynomialExpansion polyExpansion = new PolynomialExpansion() .setInputCol("features") .setOutputCol("polyFeatures") .setDegree(3); -JavaRDD data = jsc.parallelize(Lists.newArrayList( +JavaRDD data = jsc.parallelize(Arrays.asList( RowFactory.create(Vectors.dense(-2.0, 2.3)), RowFactory.create(Vectors.dense(0.0, 0.0)), RowFactory.create(Vectors.dense(0.6, -1.1)) @@ -869,7 +869,7 @@ encoded.select("id", "categoryVec").foreach(println)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.OneHotEncoder; @@ -883,7 +883,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, "a"), RowFactory.create(1, "b"), RowFactory.create(2, "c"), @@ -1206,7 +1206,7 @@ val bucketedData = bucketizer.transform(dataFrame)
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.Row; @@ -1218,7 +1218,7 @@ import org.apache.spark.sql.types.StructType; double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; -JavaRDD data = jsc.parallelize(Lists.newArrayList( +JavaRDD data = jsc.parallelize(Arrays.asList( RowFactory.create(-0.5), RowFactory.create(-0.3), RowFactory.create(0.0), @@ -1307,7 +1307,7 @@ transformer.transform(dataFrame).show()
{% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.ElementwiseProduct; @@ -1323,7 +1323,7 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; // Create some vector data; also works for sparse vectors -JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD jrdd = jsc.parallelize(Arrays.asList( RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)), RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0)) )); diff --git a/docs/ml-guide.md b/docs/ml-guide.md index a03ab4356a..4fe0ea78bb 100644 --- a/docs/ml-guide.md +++ b/docs/ml-guide.md @@ -274,8 +274,9 @@ sc.stop()
{% highlight java %} +import java.util.Arrays; import java.util.List; -import com.google.common.collect.Lists; + import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.classification.LogisticRegressionModel; @@ -294,7 +295,7 @@ SQLContext jsql = new SQLContext(jsc); // Prepare training data. // We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans // into DataFrames, where it uses the bean metadata to infer the schema. -List localTraining = Lists.newArrayList( +List localTraining = Arrays.asList( new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)), new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)), new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)), @@ -335,7 +336,7 @@ LogisticRegressionModel model2 = lr.fit(training, paramMapCombined); System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap()); // Prepare test documents. -List localTest = Lists.newArrayList( +List localTest = Arrays.asList( new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)), new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)), new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))); @@ -496,8 +497,9 @@ sc.stop()
{% highlight java %} +import java.util.Arrays; import java.util.List; -import com.google.common.collect.Lists; + import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.Pipeline; @@ -546,7 +548,7 @@ JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext jsql = new SQLContext(jsc); // Prepare training documents, which are labeled. -List localTraining = Lists.newArrayList( +List localTraining = Arrays.asList( new LabeledDocument(0L, "a b c d e spark", 1.0), new LabeledDocument(1L, "b d", 0.0), new LabeledDocument(2L, "spark f g h", 1.0), @@ -571,7 +573,7 @@ Pipeline pipeline = new Pipeline() PipelineModel model = pipeline.fit(training); // Prepare test documents, which are unlabeled. -List localTest = Lists.newArrayList( +List localTest = Arrays.asList( new Document(4L, "spark i j k"), new Document(5L, "l m n"), new Document(6L, "mapreduce spark"), @@ -747,8 +749,9 @@ sc.stop()
{% highlight java %} +import java.util.Arrays; import java.util.List; -import com.google.common.collect.Lists; + import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.ml.Pipeline; @@ -800,7 +803,7 @@ JavaSparkContext jsc = new JavaSparkContext(conf); SQLContext jsql = new SQLContext(jsc); // Prepare training documents, which are labeled. -List localTraining = Lists.newArrayList( +List localTraining = Arrays.asList( new LabeledDocument(0L, "a b c d e spark", 1.0), new LabeledDocument(1L, "b d", 0.0), new LabeledDocument(2L, "spark f g h", 1.0), @@ -849,7 +852,7 @@ crossval.setNumFolds(2); // Use 3+ in practice CrossValidatorModel cvModel = crossval.fit(training); // Prepare test documents, which are unlabeled. -List localTest = Lists.newArrayList( +List localTest = Arrays.asList( new Document(4L, "spark i j k"), new Document(5L, "l m n"), new Document(6L, "mapreduce spark"), -- cgit v1.2.3