aboutsummaryrefslogtreecommitdiff
path: root/docs/ml-features.md
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-08-19 09:41:09 +0100
committerSean Owen <sowen@cloudera.com>2015-08-19 09:41:09 +0100
commitf141efeafb42b14b5fcfd9aa8c5275162042349f (patch)
treea99653aabf1fba1de0fd8817ce142c36f0c1a4d3 /docs/ml-features.md
parentb23c4d3ffc36e47c057360c611d8ab1a13877699 (diff)
downloadspark-f141efeafb42b14b5fcfd9aa8c5275162042349f.tar.gz
spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.tar.bz2
spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.zip
[SPARK-10070] [DOCS] Remove Guava dependencies in user guides
`Lists.newArrayList` -> `Arrays.asList` CC jkbradley feynmanliang Anybody into replacing usages of `Lists.newArrayList` in the examples / source code too? this method isn't useful in Java 7 and beyond. Author: Sean Owen <sowen@cloudera.com> Closes #8272 from srowen/SPARK-10070.
Diffstat (limited to 'docs/ml-features.md')
-rw-r--r--docs/ml-features.md52
1 files changed, 26 insertions, 26 deletions
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 28a61933f8..d82c85ee75 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -55,7 +55,7 @@ rescaledData.select("features", "label").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.HashingTF;
@@ -70,7 +70,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, "Hi I heard about Spark"),
RowFactory.create(0, "I wish Java could use case classes"),
RowFactory.create(1, "Logistic regression models are neat")
@@ -153,7 +153,7 @@ result.select("result").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -167,10 +167,10 @@ JavaSparkContext jsc = ...
SQLContext sqlContext = ...
// Input data: Each row is a bag of words from a sentence or document.
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
- RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
- RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
- RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))),
+ RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))),
+ RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))
));
StructType schema = new StructType(new StructField[]{
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
@@ -248,7 +248,7 @@ regexTokenized.select("words", "label").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.RegexTokenizer;
@@ -262,7 +262,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, "Hi I heard about Spark"),
RowFactory.create(1, "I wish Java could use case classes"),
RowFactory.create(2, "Logistic,regression,models,are,neat")
@@ -341,7 +341,7 @@ ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(pri
[`NGram`](api/java/org/apache/spark/ml/feature/NGram.html) takes an input column name, an output column name, and an optional length parameter n (n=2 by default).
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.NGram;
@@ -354,10 +354,10 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
- RowFactory.create(0D, Lists.newArrayList("Hi", "I", "heard", "about", "Spark")),
- RowFactory.create(1D, Lists.newArrayList("I", "wish", "Java", "could", "use", "case", "classes")),
- RowFactory.create(2D, Lists.newArrayList("Logistic", "regression", "models", "are", "neat"))
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
+ RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
+ RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
));
StructType schema = new StructType(new StructField[]{
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
@@ -427,7 +427,7 @@ binarizedFeatures.collect().foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.Binarizer;
@@ -439,7 +439,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, 0.1),
RowFactory.create(1, 0.8),
RowFactory.create(2, 0.2)
@@ -511,7 +511,7 @@ result.show()
<div data-lang="java" markdown="1">
See the [Java API documentation](api/java/org/apache/spark/ml/feature/PCA.html) for API details.
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -529,7 +529,7 @@ import org.apache.spark.sql.types.StructType;
JavaSparkContext jsc = ...
SQLContext jsql = ...
-JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
@@ -593,7 +593,7 @@ polyDF.select("polyFeatures").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -614,7 +614,7 @@ PolynomialExpansion polyExpansion = new PolynomialExpansion()
.setInputCol("features")
.setOutputCol("polyFeatures")
.setDegree(3);
-JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.dense(-2.0, 2.3)),
RowFactory.create(Vectors.dense(0.0, 0.0)),
RowFactory.create(Vectors.dense(0.6, -1.1))
@@ -869,7 +869,7 @@ encoded.select("id", "categoryVec").foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.OneHotEncoder;
@@ -883,7 +883,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, "a"),
RowFactory.create(1, "b"),
RowFactory.create(2, "c"),
@@ -1206,7 +1206,7 @@ val bucketedData = bucketizer.transform(dataFrame)
<div data-lang="java">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
@@ -1218,7 +1218,7 @@ import org.apache.spark.sql.types.StructType;
double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
-JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(-0.5),
RowFactory.create(-0.3),
RowFactory.create(0.0),
@@ -1307,7 +1307,7 @@ transformer.transform(dataFrame).show()
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.ElementwiseProduct;
@@ -1323,7 +1323,7 @@ import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
// Create some vector data; also works for sparse vectors
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
));