diff options
author | Sean Owen <sowen@cloudera.com> | 2015-08-19 09:41:09 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-08-19 09:41:09 +0100 |
commit | f141efeafb42b14b5fcfd9aa8c5275162042349f (patch) | |
tree | a99653aabf1fba1de0fd8817ce142c36f0c1a4d3 /docs/ml-features.md | |
parent | b23c4d3ffc36e47c057360c611d8ab1a13877699 (diff) | |
download | spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.tar.gz spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.tar.bz2 spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.zip |
[SPARK-10070] [DOCS] Remove Guava dependencies in user guides
`Lists.newArrayList` -> `Arrays.asList`
CC jkbradley feynmanliang
Anybody into replacing usages of `Lists.newArrayList` in the examples / source code too? this method isn't useful in Java 7 and beyond.
Author: Sean Owen <sowen@cloudera.com>
Closes #8272 from srowen/SPARK-10070.
Diffstat (limited to 'docs/ml-features.md')
-rw-r--r-- | docs/ml-features.md | 52 |
1 files changed, 26 insertions, 26 deletions
diff --git a/docs/ml-features.md b/docs/ml-features.md index 28a61933f8..d82c85ee75 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -55,7 +55,7 @@ rescaledData.select("features", "label").take(3).foreach(println) <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.HashingTF; @@ -70,7 +70,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, "Hi I heard about Spark"), RowFactory.create(0, "I wish Java could use case classes"), RowFactory.create(1, "Logistic regression models are neat") @@ -153,7 +153,7 @@ result.select("result").take(3).foreach(println) <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -167,10 +167,10 @@ JavaSparkContext jsc = ... SQLContext sqlContext = ... // Input data: Each row is a bag of words from a sentence or document. -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( - RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))), - RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))), - RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" "))) +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))), + RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))), + RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" "))) )); StructType schema = new StructType(new StructField[]{ new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) @@ -248,7 +248,7 @@ regexTokenized.select("words", "label").take(3).foreach(println) <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.RegexTokenizer; @@ -262,7 +262,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, "Hi I heard about Spark"), RowFactory.create(1, "I wish Java could use case classes"), RowFactory.create(2, "Logistic,regression,models,are,neat") @@ -341,7 +341,7 @@ ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(pri [`NGram`](api/java/org/apache/spark/ml/feature/NGram.html) takes an input column name, an output column name, and an optional length parameter n (n=2 by default). {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.NGram; @@ -354,10 +354,10 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( - RowFactory.create(0D, Lists.newArrayList("Hi", "I", "heard", "about", "Spark")), - RowFactory.create(1D, Lists.newArrayList("I", "wish", "Java", "could", "use", "case", "classes")), - RowFactory.create(2D, Lists.newArrayList("Logistic", "regression", "models", "are", "neat")) +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( + RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")), + RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")), + RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat")) )); StructType schema = new StructType(new StructField[]{ new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), @@ -427,7 +427,7 @@ binarizedFeatures.collect().foreach(println) <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.Binarizer; @@ -439,7 +439,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, 0.1), RowFactory.create(1, 0.8), RowFactory.create(2, 0.2) @@ -511,7 +511,7 @@ result.show() <div data-lang="java" markdown="1"> See the [Java API documentation](api/java/org/apache/spark/ml/feature/PCA.html) for API details. {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -529,7 +529,7 @@ import org.apache.spark.sql.types.StructType; JavaSparkContext jsc = ... SQLContext jsql = ... -JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> data = jsc.parallelize(Arrays.asList( RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})), RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)), RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)) @@ -593,7 +593,7 @@ polyDF.select("polyFeatures").take(3).foreach(println) <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -614,7 +614,7 @@ PolynomialExpansion polyExpansion = new PolynomialExpansion() .setInputCol("features") .setOutputCol("polyFeatures") .setDegree(3); -JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> data = jsc.parallelize(Arrays.asList( RowFactory.create(Vectors.dense(-2.0, 2.3)), RowFactory.create(Vectors.dense(0.0, 0.0)), RowFactory.create(Vectors.dense(0.6, -1.1)) @@ -869,7 +869,7 @@ encoded.select("id", "categoryVec").foreach(println) <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.OneHotEncoder; @@ -883,7 +883,7 @@ import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(0, "a"), RowFactory.create(1, "b"), RowFactory.create(2, "c"), @@ -1206,7 +1206,7 @@ val bucketedData = bucketizer.transform(dataFrame) <div data-lang="java"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.Row; @@ -1218,7 +1218,7 @@ import org.apache.spark.sql.types.StructType; double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; -JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> data = jsc.parallelize(Arrays.asList( RowFactory.create(-0.5), RowFactory.create(-0.3), RowFactory.create(0.0), @@ -1307,7 +1307,7 @@ transformer.transform(dataFrame).show() <div data-lang="java" markdown="1"> {% highlight java %} -import com.google.common.collect.Lists; +import java.util.Arrays; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.ElementwiseProduct; @@ -1323,7 +1323,7 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; // Create some vector data; also works for sparse vectors -JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList( +JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)), RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0)) )); |