Revert "[SPARK-13019][DOCS] Replace example code in mllib-statistics.md using include_example"

This reverts commit 1af8de200c4d3357bcb09e7bbc6deece00e885f2.
author: Xiangrui Meng <meng@databricks.com> 2016-03-21 17:42:30 -0700
committer: Xiangrui Meng <meng@databricks.com> 2016-03-21 17:42:30 -0700
commit: 43ef1e52bfe359f0f051a607a8dc77cc3b269508 (patch)
tree: 8b03ce50a036b684c8cb5fe0c92dc2dfa350ab90 /examples/src/main/java
parent: 3f49e0766f3a369a44e14632de68c657773b7a27 (diff)
download: spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.gz
spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.bz2
spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.zip
6 files changed, 0 insertions, 387 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
deleted file mode 100644
index fd19b43504..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaDoubleRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.stat.Statistics;
-// $example off$
-
-public class JavaCorrelationsExample {
-  public static void main(String[] args) {
-
-    SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
-    JavaSparkContext jsc = new JavaSparkContext(conf);
-
-    // $example on$
-    JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
-      Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0));  // a series
-
-    // must have the same number of partitions and cardinality as seriesX
-    JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
-      Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));
-
-    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
-    // If a method is not specified, Pearson's method will be used by default.
-    Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
-    System.out.println("Correlation is: " + correlation);
-
-    // note that each Vector is a row and not a column
-    JavaRDD<Vector> data = jsc.parallelize(
-      Arrays.asList(
-        Vectors.dense(1.0, 10.0, 100.0),
-        Vectors.dense(2.0, 20.0, 200.0),
-        Vectors.dense(5.0, 33.0, 366.0)
-      )
-    );
-
-    // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
-    // If a method is not specified, Pearson's method will be used by default.
-    Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
-    System.out.println(correlMatrix.toString());
-    // $example off$
-
-    jsc.stop();
-  }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
deleted file mode 100644
index b48b95ff1d..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Matrices;
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.stat.Statistics;
-import org.apache.spark.mllib.stat.test.ChiSqTestResult;
-// $example off$
-
-public class JavaHypothesisTestingExample {
-  public static void main(String[] args) {
-
-    SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingExample");
-    JavaSparkContext jsc = new JavaSparkContext(conf);
-
-    // $example on$
-    // a vector composed of the frequencies of events
-    Vector vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25);
-
-    // compute the goodness of fit. If a second vector to test against is not supplied
-    // as a parameter, the test runs against a uniform distribution.
-    ChiSqTestResult goodnessOfFitTestResult = Statistics.chiSqTest(vec);
-    // summary of the test including the p-value, degrees of freedom, test statistic,
-    // the method used, and the null hypothesis.
-    System.out.println(goodnessOfFitTestResult + "\n");
-
-    // Create a contingency matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
-    Matrix mat = Matrices.dense(3, 2, new double[]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
-
-    // conduct Pearson's independence test on the input contingency matrix
-    ChiSqTestResult independenceTestResult = Statistics.chiSqTest(mat);
-    // summary of the test including the p-value, degrees of freedom...
-    System.out.println(independenceTestResult + "\n");
-
-    // an RDD of labeled points
-    JavaRDD<LabeledPoint> obs = jsc.parallelize(
-      Arrays.asList(
-        new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
-        new LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
-        new LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5))
-      )
-    );
-
-    // The contingency table is constructed from the raw (feature, label) pairs and used to conduct
-    // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
-    // against the label.
-    ChiSqTestResult[] featureTestResults = Statistics.chiSqTest(obs.rdd());
-    int i = 1;
-    for (ChiSqTestResult result : featureTestResults) {
-      System.out.println("Column " + i + ":");
-      System.out.println(result + "\n");  // summary of the test
-      i++;
-    }
-    // $example off$
-
-    jsc.stop();
-  }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
deleted file mode 100644
index fe611c9ae6..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaDoubleRDD;
-import org.apache.spark.mllib.stat.Statistics;
-import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
-// $example off$
-
-public class JavaHypothesisTestingKolmogorovSmirnovTestExample {
-  public static void main(String[] args) {
-
-    SparkConf conf =
-      new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
-    JavaSparkContext jsc = new JavaSparkContext(conf);
-
-    // $example on$
-    JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
-    KolmogorovSmirnovTestResult testResult =
-      Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
-    // summary of the test including the p-value, test statistic, and null hypothesis
-    // if our p-value indicates significance, we can reject the null hypothesis
-    System.out.println(testResult);
-    // $example off$
-
-    jsc.stop();
-  }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
deleted file mode 100644
index 41de0d90ec..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.stat.KernelDensity;
-// $example off$
-
-public class JavaKernelDensityEstimationExample {
-  public static void main(String[] args) {
-
-    SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
-    JavaSparkContext jsc = new JavaSparkContext(conf);
-
-    // $example on$
-    // an RDD of sample data
-    JavaRDD<Double> data = jsc.parallelize(
-      Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));
-
-    // Construct the density estimator with the sample data
-    // and a standard deviation for the Gaussian kernels
-    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);
-
-    // Find density estimates for the given values
-    double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});
-
-    System.out.println(Arrays.toString(densities));
-    // $example off$
-
-    jsc.stop();
-  }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
deleted file mode 100644
index f5a451019b..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import com.google.common.collect.ImmutableMap;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-
-// $example on$
-import java.util.*;
-
-import scala.Tuple2;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.function.VoidFunction;
-// $example off$
-
-public class JavaStratifiedSamplingExample {
-  public static void main(String[] args) {
-
-    SparkConf conf = new SparkConf().setAppName("JavaStratifiedSamplingExample");
-    JavaSparkContext jsc = new JavaSparkContext(conf);
-
-    // $example on$
-    List<Tuple2<Integer, Character>> list = new ArrayList<Tuple2<Integer, Character>>(
-      Arrays.<Tuple2<Integer, Character>>asList(
-        new Tuple2(1, 'a'),
-        new Tuple2(1, 'b'),
-        new Tuple2(2, 'c'),
-        new Tuple2(2, 'd'),
-        new Tuple2(2, 'e'),
-        new Tuple2(3, 'f')
-      )
-    );
-
-    JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list);
-
-    // specify the exact fraction desired from each key Map<K, Object>
-    ImmutableMap<Integer, Object> fractions =
-      ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3);
-
-    // Get an approximate sample from each stratum
-    JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);
-    // Get an exact sample from each stratum
-    JavaPairRDD<Integer, Character> exactSample = data.sampleByKeyExact(false, fractions);
-    // $example off$
-
-    System.out.println("approxSample size is " + approxSample.collect().size());
-    for (Tuple2<Integer, Character> t : approxSample.collect()) {
-      System.out.println(t._1() + " " + t._2());
-    }
-
-    System.out.println("exactSample size is " + exactSample.collect().size());
-    for (Tuple2<Integer, Character> t : exactSample.collect()) {
-      System.out.println(t._1() + " " + t._2());
-    }
-
-    jsc.stop();
-  }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
deleted file mode 100644
index 278706bc8f..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
-import org.apache.spark.mllib.stat.Statistics;
-// $example off$
-
-public class JavaSummaryStatisticsExample {
-  public static void main(String[] args) {
-
-    SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
-    JavaSparkContext jsc = new JavaSparkContext(conf);
-
-    // $example on$
-    JavaRDD<Vector> mat = jsc.parallelize(
-      Arrays.asList(
-        Vectors.dense(1.0, 10.0, 100.0),
-        Vectors.dense(2.0, 20.0, 200.0),
-        Vectors.dense(3.0, 30.0, 300.0)
-      )
-    ); // an RDD of Vectors
-
-    // Compute column summary statistics.
-    MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd());
-    System.out.println(summary.mean());  // a dense vector containing the mean value for each column
-    System.out.println(summary.variance());  // column-wise variance
-    System.out.println(summary.numNonzeros());  // number of nonzeros in each column
-    // $example off$
-
-    jsc.stop();
-  }
-}
author	Xiangrui Meng <meng@databricks.com>	2016-03-21 17:42:30 -0700
committer	Xiangrui Meng <meng@databricks.com>	2016-03-21 17:42:30 -0700
commit	43ef1e52bfe359f0f051a607a8dc77cc3b269508 (patch)
tree	8b03ce50a036b684c8cb5fe0c92dc2dfa350ab90 /examples/src/main/java
parent	3f49e0766f3a369a44e14632de68c657773b7a27 (diff)
download	spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.gz spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.bz2 spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.zip