aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-08-19 09:41:09 +0100
committerSean Owen <sowen@cloudera.com>2015-08-19 09:41:09 +0100
commitf141efeafb42b14b5fcfd9aa8c5275162042349f (patch)
treea99653aabf1fba1de0fd8817ce142c36f0c1a4d3 /docs
parentb23c4d3ffc36e47c057360c611d8ab1a13877699 (diff)
downloadspark-f141efeafb42b14b5fcfd9aa8c5275162042349f.tar.gz
spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.tar.bz2
spark-f141efeafb42b14b5fcfd9aa8c5275162042349f.zip
[SPARK-10070] [DOCS] Remove Guava dependencies in user guides
`Lists.newArrayList` -> `Arrays.asList` CC jkbradley feynmanliang Anybody into replacing usages of `Lists.newArrayList` in the examples / source code too? this method isn't useful in Java 7 and beyond. Author: Sean Owen <sowen@cloudera.com> Closes #8272 from srowen/SPARK-10070.
Diffstat (limited to 'docs')
-rw-r--r--docs/ml-features.md52
-rw-r--r--docs/ml-guide.md21
2 files changed, 38 insertions, 35 deletions
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 28a61933f8..d82c85ee75 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -55,7 +55,7 @@ rescaledData.select("features", "label").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.HashingTF;
@@ -70,7 +70,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, "Hi I heard about Spark"),
RowFactory.create(0, "I wish Java could use case classes"),
RowFactory.create(1, "Logistic regression models are neat")
@@ -153,7 +153,7 @@ result.select("result").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -167,10 +167,10 @@ JavaSparkContext jsc = ...
SQLContext sqlContext = ...
// Input data: Each row is a bag of words from a sentence or document.
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
- RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))),
- RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))),
- RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" ")))
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))),
+ RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))),
+ RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))
));
StructType schema = new StructType(new StructField[]{
new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
@@ -248,7 +248,7 @@ regexTokenized.select("words", "label").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.RegexTokenizer;
@@ -262,7 +262,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, "Hi I heard about Spark"),
RowFactory.create(1, "I wish Java could use case classes"),
RowFactory.create(2, "Logistic,regression,models,are,neat")
@@ -341,7 +341,7 @@ ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(pri
[`NGram`](api/java/org/apache/spark/ml/feature/NGram.html) takes an input column name, an output column name, and an optional length parameter n (n=2 by default).
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.NGram;
@@ -354,10 +354,10 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
- RowFactory.create(0D, Lists.newArrayList("Hi", "I", "heard", "about", "Spark")),
- RowFactory.create(1D, Lists.newArrayList("I", "wish", "Java", "could", "use", "case", "classes")),
- RowFactory.create(2D, Lists.newArrayList("Logistic", "regression", "models", "are", "neat"))
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
+ RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
+ RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
));
StructType schema = new StructType(new StructField[]{
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
@@ -427,7 +427,7 @@ binarizedFeatures.collect().foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.Binarizer;
@@ -439,7 +439,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, 0.1),
RowFactory.create(1, 0.8),
RowFactory.create(2, 0.2)
@@ -511,7 +511,7 @@ result.show()
<div data-lang="java" markdown="1">
See the [Java API documentation](api/java/org/apache/spark/ml/feature/PCA.html) for API details.
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -529,7 +529,7 @@ import org.apache.spark.sql.types.StructType;
JavaSparkContext jsc = ...
SQLContext jsql = ...
-JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
@@ -593,7 +593,7 @@ polyDF.select("polyFeatures").take(3).foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -614,7 +614,7 @@ PolynomialExpansion polyExpansion = new PolynomialExpansion()
.setInputCol("features")
.setOutputCol("polyFeatures")
.setDegree(3);
-JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(Vectors.dense(-2.0, 2.3)),
RowFactory.create(Vectors.dense(0.0, 0.0)),
RowFactory.create(Vectors.dense(0.6, -1.1))
@@ -869,7 +869,7 @@ encoded.select("id", "categoryVec").foreach(println)
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.OneHotEncoder;
@@ -883,7 +883,7 @@ import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create(0, "a"),
RowFactory.create(1, "b"),
RowFactory.create(2, "c"),
@@ -1206,7 +1206,7 @@ val bucketedData = bucketizer.transform(dataFrame)
<div data-lang="java">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
@@ -1218,7 +1218,7 @@ import org.apache.spark.sql.types.StructType;
double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
-JavaRDD<Row> data = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
RowFactory.create(-0.5),
RowFactory.create(-0.3),
RowFactory.create(0.0),
@@ -1307,7 +1307,7 @@ transformer.transform(dataFrame).show()
<div data-lang="java" markdown="1">
{% highlight java %}
-import com.google.common.collect.Lists;
+import java.util.Arrays;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.ml.feature.ElementwiseProduct;
@@ -1323,7 +1323,7 @@ import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
// Create some vector data; also works for sparse vectors
-JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
));
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index a03ab4356a..4fe0ea78bb 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -274,8 +274,9 @@ sc.stop()
<div data-lang="java">
{% highlight java %}
+import java.util.Arrays;
import java.util.List;
-import com.google.common.collect.Lists;
+
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.classification.LogisticRegressionModel;
@@ -294,7 +295,7 @@ SQLContext jsql = new SQLContext(jsc);
// Prepare training data.
// We use LabeledPoint, which is a JavaBean. Spark SQL can convert RDDs of JavaBeans
// into DataFrames, where it uses the bean metadata to infer the schema.
-List<LabeledPoint> localTraining = Lists.newArrayList(
+List<LabeledPoint> localTraining = Arrays.asList(
new LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
new LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
new LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
@@ -335,7 +336,7 @@ LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());
// Prepare test documents.
-List<LabeledPoint> localTest = Lists.newArrayList(
+List<LabeledPoint> localTest = Arrays.asList(
new LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
new LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
new LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)));
@@ -496,8 +497,9 @@ sc.stop()
<div data-lang="java">
{% highlight java %}
+import java.util.Arrays;
import java.util.List;
-import com.google.common.collect.Lists;
+
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.Pipeline;
@@ -546,7 +548,7 @@ JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
// Prepare training documents, which are labeled.
-List<LabeledDocument> localTraining = Lists.newArrayList(
+List<LabeledDocument> localTraining = Arrays.asList(
new LabeledDocument(0L, "a b c d e spark", 1.0),
new LabeledDocument(1L, "b d", 0.0),
new LabeledDocument(2L, "spark f g h", 1.0),
@@ -571,7 +573,7 @@ Pipeline pipeline = new Pipeline()
PipelineModel model = pipeline.fit(training);
// Prepare test documents, which are unlabeled.
-List<Document> localTest = Lists.newArrayList(
+List<Document> localTest = Arrays.asList(
new Document(4L, "spark i j k"),
new Document(5L, "l m n"),
new Document(6L, "mapreduce spark"),
@@ -747,8 +749,9 @@ sc.stop()
<div data-lang="java">
{% highlight java %}
+import java.util.Arrays;
import java.util.List;
-import com.google.common.collect.Lists;
+
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.ml.Pipeline;
@@ -800,7 +803,7 @@ JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
// Prepare training documents, which are labeled.
-List<LabeledDocument> localTraining = Lists.newArrayList(
+List<LabeledDocument> localTraining = Arrays.asList(
new LabeledDocument(0L, "a b c d e spark", 1.0),
new LabeledDocument(1L, "b d", 0.0),
new LabeledDocument(2L, "spark f g h", 1.0),
@@ -849,7 +852,7 @@ crossval.setNumFolds(2); // Use 3+ in practice
CrossValidatorModel cvModel = crossval.fit(training);
// Prepare test documents, which are unlabeled.
-List<Document> localTest = Lists.newArrayList(
+List<Document> localTest = Arrays.asList(
new Document(4L, "spark i j k"),
new Document(5L, "l m n"),
new Document(6L, "mapreduce spark"),