diff options
Diffstat (limited to 'examples/src/main/java')
6 files changed, 0 insertions, 387 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java deleted file mode 100644 index fd19b43504..0000000000 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.mllib; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -// $example on$ -import java.util.Arrays; - -import org.apache.spark.api.java.JavaDoubleRDD; -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.Matrix; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.mllib.stat.Statistics; -// $example off$ - -public class JavaCorrelationsExample { - public static void main(String[] args) { - - SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - - // $example on$ - JavaDoubleRDD seriesX = jsc.parallelizeDoubles( - Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0)); // a series - - // must have the same number of partitions and cardinality as seriesX - JavaDoubleRDD seriesY = jsc.parallelizeDoubles( - Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0)); - - // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. - // If a method is not specified, Pearson's method will be used by default. - Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson"); - System.out.println("Correlation is: " + correlation); - - // note that each Vector is a row and not a column - JavaRDD<Vector> data = jsc.parallelize( - Arrays.asList( - Vectors.dense(1.0, 10.0, 100.0), - Vectors.dense(2.0, 20.0, 200.0), - Vectors.dense(5.0, 33.0, 366.0) - ) - ); - - // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method. - // If a method is not specified, Pearson's method will be used by default. - Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson"); - System.out.println(correlMatrix.toString()); - // $example off$ - - jsc.stop(); - } -} - diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java deleted file mode 100644 index b48b95ff1d..0000000000 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.mllib; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; - -// $example on$ -import java.util.Arrays; - -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.Matrices; -import org.apache.spark.mllib.linalg.Matrix; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.mllib.stat.Statistics; -import org.apache.spark.mllib.stat.test.ChiSqTestResult; -// $example off$ - -public class JavaHypothesisTestingExample { - public static void main(String[] args) { - - SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - - // $example on$ - // a vector composed of the frequencies of events - Vector vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25); - - // compute the goodness of fit. If a second vector to test against is not supplied - // as a parameter, the test runs against a uniform distribution. - ChiSqTestResult goodnessOfFitTestResult = Statistics.chiSqTest(vec); - // summary of the test including the p-value, degrees of freedom, test statistic, - // the method used, and the null hypothesis. - System.out.println(goodnessOfFitTestResult + "\n"); - - // Create a contingency matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0)) - Matrix mat = Matrices.dense(3, 2, new double[]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0}); - - // conduct Pearson's independence test on the input contingency matrix - ChiSqTestResult independenceTestResult = Statistics.chiSqTest(mat); - // summary of the test including the p-value, degrees of freedom... - System.out.println(independenceTestResult + "\n"); - - // an RDD of labeled points - JavaRDD<LabeledPoint> obs = jsc.parallelize( - Arrays.asList( - new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)), - new LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)), - new LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5)) - ) - ); - - // The contingency table is constructed from the raw (feature, label) pairs and used to conduct - // the independence test. Returns an array containing the ChiSquaredTestResult for every feature - // against the label. - ChiSqTestResult[] featureTestResults = Statistics.chiSqTest(obs.rdd()); - int i = 1; - for (ChiSqTestResult result : featureTestResults) { - System.out.println("Column " + i + ":"); - System.out.println(result + "\n"); // summary of the test - i++; - } - // $example off$ - - jsc.stop(); - } -} diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java deleted file mode 100644 index fe611c9ae6..0000000000 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.mllib; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -// $example on$ -import java.util.Arrays; - -import org.apache.spark.api.java.JavaDoubleRDD; -import org.apache.spark.mllib.stat.Statistics; -import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult; -// $example off$ - -public class JavaHypothesisTestingKolmogorovSmirnovTestExample { - public static void main(String[] args) { - - SparkConf conf = - new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - - // $example on$ - JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25)); - KolmogorovSmirnovTestResult testResult = - Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0); - // summary of the test including the p-value, test statistic, and null hypothesis - // if our p-value indicates significance, we can reject the null hypothesis - System.out.println(testResult); - // $example off$ - - jsc.stop(); - } -} - diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java deleted file mode 100644 index 41de0d90ec..0000000000 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.mllib; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -// $example on$ -import java.util.Arrays; - -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.stat.KernelDensity; -// $example off$ - -public class JavaKernelDensityEstimationExample { - public static void main(String[] args) { - - SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - - // $example on$ - // an RDD of sample data - JavaRDD<Double> data = jsc.parallelize( - Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0)); - - // Construct the density estimator with the sample data - // and a standard deviation for the Gaussian kernels - KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0); - - // Find density estimates for the given values - double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0}); - - System.out.println(Arrays.toString(densities)); - // $example off$ - - jsc.stop(); - } -} - diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java deleted file mode 100644 index f5a451019b..0000000000 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.mllib; - -import com.google.common.collect.ImmutableMap; -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; - -// $example on$ -import java.util.*; - -import scala.Tuple2; - -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.function.VoidFunction; -// $example off$ - -public class JavaStratifiedSamplingExample { - public static void main(String[] args) { - - SparkConf conf = new SparkConf().setAppName("JavaStratifiedSamplingExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - - // $example on$ - List<Tuple2<Integer, Character>> list = new ArrayList<Tuple2<Integer, Character>>( - Arrays.<Tuple2<Integer, Character>>asList( - new Tuple2(1, 'a'), - new Tuple2(1, 'b'), - new Tuple2(2, 'c'), - new Tuple2(2, 'd'), - new Tuple2(2, 'e'), - new Tuple2(3, 'f') - ) - ); - - JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list); - - // specify the exact fraction desired from each key Map<K, Object> - ImmutableMap<Integer, Object> fractions = - ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3); - - // Get an approximate sample from each stratum - JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions); - // Get an exact sample from each stratum - JavaPairRDD<Integer, Character> exactSample = data.sampleByKeyExact(false, fractions); - // $example off$ - - System.out.println("approxSample size is " + approxSample.collect().size()); - for (Tuple2<Integer, Character> t : approxSample.collect()) { - System.out.println(t._1() + " " + t._2()); - } - - System.out.println("exactSample size is " + exactSample.collect().size()); - for (Tuple2<Integer, Character> t : exactSample.collect()) { - System.out.println(t._1() + " " + t._2()); - } - - jsc.stop(); - } -} diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java deleted file mode 100644 index 278706bc8f..0000000000 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.mllib; - -import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaSparkContext; -// $example on$ -import java.util.Arrays; - -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.mllib.stat.MultivariateStatisticalSummary; -import org.apache.spark.mllib.stat.Statistics; -// $example off$ - -public class JavaSummaryStatisticsExample { - public static void main(String[] args) { - - SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample"); - JavaSparkContext jsc = new JavaSparkContext(conf); - - // $example on$ - JavaRDD<Vector> mat = jsc.parallelize( - Arrays.asList( - Vectors.dense(1.0, 10.0, 100.0), - Vectors.dense(2.0, 20.0, 200.0), - Vectors.dense(3.0, 30.0, 300.0) - ) - ); // an RDD of Vectors - - // Compute column summary statistics. - MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd()); - System.out.println(summary.mean()); // a dense vector containing the mean value for each column - System.out.println(summary.variance()); // column-wise variance - System.out.println(summary.numNonzeros()); // number of nonzeros in each column - // $example off$ - - jsc.stop(); - } -} |