aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2016-03-21 17:42:30 -0700
committerXiangrui Meng <meng@databricks.com>2016-03-21 17:42:30 -0700
commit43ef1e52bfe359f0f051a607a8dc77cc3b269508 (patch)
tree8b03ce50a036b684c8cb5fe0c92dc2dfa350ab90 /examples
parent3f49e0766f3a369a44e14632de68c657773b7a27 (diff)
downloadspark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.gz
spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.bz2
spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.zip
Revert "[SPARK-13019][DOCS] Replace example code in mllib-statistics.md using include_example"
This reverts commit 1af8de200c4d3357bcb09e7bbc6deece00e885f2.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java70
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java84
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java49
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java53
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java75
-rw-r--r--examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java56
-rw-r--r--examples/src/main/python/mllib/correlations_example.py48
-rw-r--r--examples/src/main/python/mllib/hypothesis_testing_example.py65
-rw-r--r--examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py40
-rw-r--r--examples/src/main/python/mllib/kernel_density_estimation_example.py44
-rw-r--r--examples/src/main/python/mllib/stratified_sampling_example.py38
-rw-r--r--examples/src/main/python/mllib/summary_statistics_example.py42
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala62
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala80
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala54
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala54
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala53
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala53
18 files changed, 0 insertions, 1020 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
deleted file mode 100644
index fd19b43504..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaDoubleRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.stat.Statistics;
-// $example off$
-
-public class JavaCorrelationsExample {
- public static void main(String[] args) {
-
- SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
-
- // $example on$
- JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
- Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0)); // a series
-
- // must have the same number of partitions and cardinality as seriesX
- JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
- Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));
-
- // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
- // If a method is not specified, Pearson's method will be used by default.
- Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
- System.out.println("Correlation is: " + correlation);
-
- // note that each Vector is a row and not a column
- JavaRDD<Vector> data = jsc.parallelize(
- Arrays.asList(
- Vectors.dense(1.0, 10.0, 100.0),
- Vectors.dense(2.0, 20.0, 200.0),
- Vectors.dense(5.0, 33.0, 366.0)
- )
- );
-
- // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
- // If a method is not specified, Pearson's method will be used by default.
- Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
- System.out.println(correlMatrix.toString());
- // $example off$
-
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
deleted file mode 100644
index b48b95ff1d..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Matrices;
-import org.apache.spark.mllib.linalg.Matrix;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.stat.Statistics;
-import org.apache.spark.mllib.stat.test.ChiSqTestResult;
-// $example off$
-
-public class JavaHypothesisTestingExample {
- public static void main(String[] args) {
-
- SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
-
- // $example on$
- // a vector composed of the frequencies of events
- Vector vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25);
-
- // compute the goodness of fit. If a second vector to test against is not supplied
- // as a parameter, the test runs against a uniform distribution.
- ChiSqTestResult goodnessOfFitTestResult = Statistics.chiSqTest(vec);
- // summary of the test including the p-value, degrees of freedom, test statistic,
- // the method used, and the null hypothesis.
- System.out.println(goodnessOfFitTestResult + "\n");
-
- // Create a contingency matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
- Matrix mat = Matrices.dense(3, 2, new double[]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
-
- // conduct Pearson's independence test on the input contingency matrix
- ChiSqTestResult independenceTestResult = Statistics.chiSqTest(mat);
- // summary of the test including the p-value, degrees of freedom...
- System.out.println(independenceTestResult + "\n");
-
- // an RDD of labeled points
- JavaRDD<LabeledPoint> obs = jsc.parallelize(
- Arrays.asList(
- new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
- new LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
- new LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5))
- )
- );
-
- // The contingency table is constructed from the raw (feature, label) pairs and used to conduct
- // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
- // against the label.
- ChiSqTestResult[] featureTestResults = Statistics.chiSqTest(obs.rdd());
- int i = 1;
- for (ChiSqTestResult result : featureTestResults) {
- System.out.println("Column " + i + ":");
- System.out.println(result + "\n"); // summary of the test
- i++;
- }
- // $example off$
-
- jsc.stop();
- }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
deleted file mode 100644
index fe611c9ae6..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaDoubleRDD;
-import org.apache.spark.mllib.stat.Statistics;
-import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
-// $example off$
-
-public class JavaHypothesisTestingKolmogorovSmirnovTestExample {
- public static void main(String[] args) {
-
- SparkConf conf =
- new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
-
- // $example on$
- JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
- KolmogorovSmirnovTestResult testResult =
- Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
- // summary of the test including the p-value, test statistic, and null hypothesis
- // if our p-value indicates significance, we can reject the null hypothesis
- System.out.println(testResult);
- // $example off$
-
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
deleted file mode 100644
index 41de0d90ec..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.stat.KernelDensity;
-// $example off$
-
-public class JavaKernelDensityEstimationExample {
- public static void main(String[] args) {
-
- SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
-
- // $example on$
- // an RDD of sample data
- JavaRDD<Double> data = jsc.parallelize(
- Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));
-
- // Construct the density estimator with the sample data
- // and a standard deviation for the Gaussian kernels
- KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);
-
- // Find density estimates for the given values
- double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});
-
- System.out.println(Arrays.toString(densities));
- // $example off$
-
- jsc.stop();
- }
-}
-
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
deleted file mode 100644
index f5a451019b..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import com.google.common.collect.ImmutableMap;
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-
-// $example on$
-import java.util.*;
-
-import scala.Tuple2;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.function.VoidFunction;
-// $example off$
-
-public class JavaStratifiedSamplingExample {
- public static void main(String[] args) {
-
- SparkConf conf = new SparkConf().setAppName("JavaStratifiedSamplingExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
-
- // $example on$
- List<Tuple2<Integer, Character>> list = new ArrayList<Tuple2<Integer, Character>>(
- Arrays.<Tuple2<Integer, Character>>asList(
- new Tuple2(1, 'a'),
- new Tuple2(1, 'b'),
- new Tuple2(2, 'c'),
- new Tuple2(2, 'd'),
- new Tuple2(2, 'e'),
- new Tuple2(3, 'f')
- )
- );
-
- JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list);
-
- // specify the exact fraction desired from each key Map<K, Object>
- ImmutableMap<Integer, Object> fractions =
- ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3);
-
- // Get an approximate sample from each stratum
- JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);
- // Get an exact sample from each stratum
- JavaPairRDD<Integer, Character> exactSample = data.sampleByKeyExact(false, fractions);
- // $example off$
-
- System.out.println("approxSample size is " + approxSample.collect().size());
- for (Tuple2<Integer, Character> t : approxSample.collect()) {
- System.out.println(t._1() + " " + t._2());
- }
-
- System.out.println("exactSample size is " + exactSample.collect().size());
- for (Tuple2<Integer, Character> t : exactSample.collect()) {
- System.out.println(t._1() + " " + t._2());
- }
-
- jsc.stop();
- }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
deleted file mode 100644
index 278706bc8f..0000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-// $example on$
-import java.util.Arrays;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
-import org.apache.spark.mllib.stat.Statistics;
-// $example off$
-
-public class JavaSummaryStatisticsExample {
- public static void main(String[] args) {
-
- SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
- JavaSparkContext jsc = new JavaSparkContext(conf);
-
- // $example on$
- JavaRDD<Vector> mat = jsc.parallelize(
- Arrays.asList(
- Vectors.dense(1.0, 10.0, 100.0),
- Vectors.dense(2.0, 20.0, 200.0),
- Vectors.dense(3.0, 30.0, 300.0)
- )
- ); // an RDD of Vectors
-
- // Compute column summary statistics.
- MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd());
- System.out.println(summary.mean()); // a dense vector containing the mean value for each column
- System.out.println(summary.variance()); // column-wise variance
- System.out.println(summary.numNonzeros()); // number of nonzeros in each column
- // $example off$
-
- jsc.stop();
- }
-}
diff --git a/examples/src/main/python/mllib/correlations_example.py b/examples/src/main/python/mllib/correlations_example.py
deleted file mode 100644
index 66d18f6e5d..0000000000
--- a/examples/src/main/python/mllib/correlations_example.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-import numpy as np
-
-from pyspark import SparkContext
-# $example on$
-from pyspark.mllib.stat import Statistics
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="CorrelationsExample") # SparkContext
-
- # $example on$
- seriesX = sc.parallelize([1.0, 2.0, 3.0, 3.0, 5.0]) # a series
- # seriesY must have the same number of partitions and cardinality as seriesX
- seriesY = sc.parallelize([11.0, 22.0, 33.0, 33.0, 555.0])
-
- # Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
- # If a method is not specified, Pearson's method will be used by default.
- print("Correlation is: " + str(Statistics.corr(seriesX, seriesY, method="pearson")))
-
- data = sc.parallelize(
- [np.array([1.0, 10.0, 100.0]), np.array([2.0, 20.0, 200.0]), np.array([5.0, 33.0, 366.0])]
- ) # an RDD of Vectors
-
- # calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
- # If a method is not specified, Pearson's method will be used by default.
- print(Statistics.corr(data, method="pearson"))
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/mllib/hypothesis_testing_example.py b/examples/src/main/python/mllib/hypothesis_testing_example.py
deleted file mode 100644
index e566ead0d3..0000000000
--- a/examples/src/main/python/mllib/hypothesis_testing_example.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-# $example on$
-from pyspark.mllib.linalg import Matrices, Vectors
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib.stat import Statistics
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="HypothesisTestingExample")
-
- # $example on$
- vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25) # a vector composed of the frequencies of events
-
- # compute the goodness of fit. If a second vector to test against
- # is not supplied as a parameter, the test runs against a uniform distribution.
- goodnessOfFitTestResult = Statistics.chiSqTest(vec)
-
- # summary of the test including the p-value, degrees of freedom,
- # test statistic, the method used, and the null hypothesis.
- print("%s\n" % goodnessOfFitTestResult)
-
- mat = Matrices.dense(3, 2, [1.0, 3.0, 5.0, 2.0, 4.0, 6.0]) # a contingency matrix
-
- # conduct Pearson's independence test on the input contingency matrix
- independenceTestResult = Statistics.chiSqTest(mat)
-
- # summary of the test including the p-value, degrees of freedom,
- # test statistic, the method used, and the null hypothesis.
- print("%s\n" % independenceTestResult)
-
- obs = sc.parallelize(
- [LabeledPoint(1.0, [1.0, 0.0, 3.0]),
- LabeledPoint(1.0, [1.0, 2.0, 0.0]),
- LabeledPoint(1.0, [-1.0, 0.0, -0.5])]
- ) # LabeledPoint(feature, label)
-
- # The contingency table is constructed from an RDD of LabeledPoint and used to conduct
- # the independence test. Returns an array containing the ChiSquaredTestResult for every feature
- # against the label.
- featureTestResults = Statistics.chiSqTest(obs)
-
- for i, result in enumerate(featureTestResults):
- print("Column %d:\n%s" % (i + 1, result))
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py b/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
deleted file mode 100644
index ef380dee79..0000000000
--- a/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-# $example on$
-from pyspark.mllib.stat import Statistics
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="HypothesisTestingKolmogorovSmirnovTestExample")
-
- # $example on$
- parallelData = sc.parallelize([0.1, 0.15, 0.2, 0.3, 0.25])
-
- # run a KS test for the sample versus a standard normal distribution
- testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)
- # summary of the test including the p-value, test statistic, and null hypothesis
- # if our p-value indicates significance, we can reject the null hypothesis
- # Note that the Scala functionality of calling Statistics.kolmogorovSmirnovTest with
- # a lambda to calculate the CDF is not made available in the Python API
- print(testResult)
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/python/mllib/kernel_density_estimation_example.py b/examples/src/main/python/mllib/kernel_density_estimation_example.py
deleted file mode 100644
index 3e8f7241a4..0000000000
--- a/examples/src/main/python/mllib/kernel_density_estimation_example.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-# $example on$
-from pyspark.mllib.stat import KernelDensity
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="KernelDensityEstimationExample") # SparkContext
-
- # $example on$
- # an RDD of sample data
- data = sc.parallelize([1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0])
-
- # Construct the density estimator with the sample data and a standard deviation for the Gaussian
- # kernels
- kd = KernelDensity()
- kd.setSample(data)
- kd.setBandwidth(3.0)
-
- # Find density estimates for the given values
- densities = kd.estimate([-1.0, 2.0, 5.0])
- # $example off$
-
- print(densities)
-
- sc.stop()
diff --git a/examples/src/main/python/mllib/stratified_sampling_example.py b/examples/src/main/python/mllib/stratified_sampling_example.py
deleted file mode 100644
index a13f8f08dd..0000000000
--- a/examples/src/main/python/mllib/stratified_sampling_example.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-
-if __name__ == "__main__":
- sc = SparkContext(appName="StratifiedSamplingExample") # SparkContext
-
- # $example on$
- # an RDD of any key value pairs
- data = sc.parallelize([(1, 'a'), (1, 'b'), (2, 'c'), (2, 'd'), (2, 'e'), (3, 'f')])
-
- # specify the exact fraction desired from each key as a dictionary
- fractions = {1: 0.1, 2: 0.6, 3: 0.3}
-
- approxSample = data.sampleByKey(False, fractions)
- # $example off$
-
- for each in approxSample.collect():
- print(each)
-
- sc.stop()
diff --git a/examples/src/main/python/mllib/summary_statistics_example.py b/examples/src/main/python/mllib/summary_statistics_example.py
deleted file mode 100644
index d55d1a2c2d..0000000000
--- a/examples/src/main/python/mllib/summary_statistics_example.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-# $example on$
-import numpy as np
-
-from pyspark.mllib.stat import Statistics
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="SummaryStatisticsExample") # SparkContext
-
- # $example on$
- mat = sc.parallelize(
- [np.array([1.0, 10.0, 100.0]), np.array([2.0, 20.0, 200.0]), np.array([3.0, 30.0, 300.0])]
- ) # an RDD of Vectors
-
- # Compute column summary statistics.
- summary = Statistics.colStats(mat)
- print(summary.mean()) # a dense vector containing the mean value for each column
- print(summary.variance()) # column-wise variance
- print(summary.numNonzeros()) # number of nonzeros in each column
- # $example off$
-
- sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
deleted file mode 100644
index 1202caf534..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.stat.Statistics
-import org.apache.spark.rdd.RDD
-// $example off$
-
-object CorrelationsExample {
-
- def main(args: Array[String]): Unit = {
-
- val conf = new SparkConf().setAppName("CorrelationsExample")
- val sc = new SparkContext(conf)
-
- // $example on$
- val seriesX: RDD[Double] = sc.parallelize(Array(1, 2, 3, 3, 5)) // a series
- // must have the same number of partitions and cardinality as seriesX
- val seriesY: RDD[Double] = sc.parallelize(Array(11, 22, 33, 33, 555))
-
- // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
- // method is not specified, Pearson's method will be used by default.
- val correlation: Double = Statistics.corr(seriesX, seriesY, "pearson")
- println(s"Correlation is: $correlation")
-
- val data: RDD[Vector] = sc.parallelize(
- Seq(
- Vectors.dense(1.0, 10.0, 100.0),
- Vectors.dense(2.0, 20.0, 200.0),
- Vectors.dense(5.0, 33.0, 366.0))
- ) // note that each Vector is a row and not a column
-
- // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method
- // If a method is not specified, Pearson's method will be used by default.
- val correlMatrix: Matrix = Statistics.corr(data, "pearson")
- println(correlMatrix.toString)
- // $example off$
-
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
deleted file mode 100644
index 0d391a3637..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.stat.Statistics
-import org.apache.spark.mllib.stat.test.ChiSqTestResult
-import org.apache.spark.rdd.RDD
-// $example off$
-
-object HypothesisTestingExample {
-
- def main(args: Array[String]) {
-
- val conf = new SparkConf().setAppName("HypothesisTestingExample")
- val sc = new SparkContext(conf)
-
- // $example on$
- // a vector composed of the frequencies of events
- val vec: Vector = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)
-
- // compute the goodness of fit. If a second vector to test against is not supplied
- // as a parameter, the test runs against a uniform distribution.
- val goodnessOfFitTestResult = Statistics.chiSqTest(vec)
- // summary of the test including the p-value, degrees of freedom, test statistic, the method
- // used, and the null hypothesis.
- println(s"$goodnessOfFitTestResult\n")
-
- // a contingency matrix. Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
- val mat: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
-
- // conduct Pearson's independence test on the input contingency matrix
- val independenceTestResult = Statistics.chiSqTest(mat)
- // summary of the test including the p-value, degrees of freedom
- println(s"$independenceTestResult\n")
-
- val obs: RDD[LabeledPoint] =
- sc.parallelize(
- Seq(
- LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
- LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
- LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5)
- )
- )
- ) // (feature, label) pairs.
-
- // The contingency table is constructed from the raw (feature, label) pairs and used to conduct
- // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
- // against the label.
- val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
- featureTestResults.zipWithIndex.foreach { case (k, v) =>
- println("Column " + (v + 1).toString + ":")
- println(k)
- } // summary of the test
- // $example off$
-
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala
deleted file mode 100644
index 840874cf3c..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.stat.Statistics
-import org.apache.spark.rdd.RDD
-// $example off$
-
-object HypothesisTestingKolmogorovSmirnovTestExample {
-
- def main(args: Array[String]): Unit = {
-
- val conf = new SparkConf().setAppName("HypothesisTestingKolmogorovSmirnovTestExample")
- val sc = new SparkContext(conf)
-
- // $example on$
- val data: RDD[Double] = sc.parallelize(Seq(0.1, 0.15, 0.2, 0.3, 0.25)) // an RDD of sample data
-
- // run a KS test for the sample versus a standard normal distribution
- val testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
- // summary of the test including the p-value, test statistic, and null hypothesis if our p-value
- // indicates significance, we can reject the null hypothesis.
- println(testResult)
- println()
-
- // perform a KS test using a cumulative distribution function of our making
- val myCDF = Map(0.1 -> 0.2, 0.15 -> 0.6, 0.2 -> 0.05, 0.3 -> 0.05, 0.25 -> 0.1)
- val testResult2 = Statistics.kolmogorovSmirnovTest(data, myCDF)
- println(testResult2)
- // $example off$
-
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala
deleted file mode 100644
index cc5d159b36..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.stat.KernelDensity
-import org.apache.spark.rdd.RDD
-// $example off$
-
-object KernelDensityEstimationExample {
-
- def main(args: Array[String]): Unit = {
-
- val conf = new SparkConf().setAppName("KernelDensityEstimationExample")
- val sc = new SparkContext(conf)
-
- // $example on$
- // an RDD of sample data
- val data: RDD[Double] = sc.parallelize(Seq(1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 9))
-
- // Construct the density estimator with the sample data and a standard deviation
- // for the Gaussian kernels
- val kd = new KernelDensity()
- .setSample(data)
- .setBandwidth(3.0)
-
- // Find density estimates for the given values
- val densities = kd.estimate(Array(-1.0, 2.0, 5.0))
- // $example off$
-
- densities.foreach(println)
-
- sc.stop()
- }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
deleted file mode 100644
index 169467926c..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-
-object StratifiedSamplingExample {
-
- def main(args: Array[String]): Unit = {
-
- val conf = new SparkConf().setAppName("StratifiedSamplingExample")
- val sc = new SparkContext(conf)
-
- // $example on$
- // an RDD[(K, V)] of any key value pairs
- val data = sc.parallelize(
- Seq((1, 'a'), (1, 'b'), (2, 'c'), (2, 'd'), (2, 'e'), (3, 'f')))
-
- // specify the exact fraction desired from each key
- val fractions = Map(1 -> 0.1, 2 -> 0.6, 3 -> 0.3)
-
- // Get an approximate sample from each stratum
- val approxSample = data.sampleByKey(withReplacement = false, fractions)
- // Get an exact sample from each stratum
- val exactSample = data.sampleByKeyExact(withReplacement = false, fractions)
- // $example off$
-
- println("approxSample size is " + approxSample.collect().size.toString)
- approxSample.collect().foreach(println)
-
- println("exactSample its size is " + exactSample.collect().size.toString)
- exactSample.collect().foreach(println)
-
- sc.stop()
- }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
deleted file mode 100644
index 948b443c0a..0000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
-// $example off$
-
-object SummaryStatisticsExample {
-
- def main(args: Array[String]): Unit = {
-
- val conf = new SparkConf().setAppName("SummaryStatisticsExample")
- val sc = new SparkContext(conf)
-
- // $example on$
- val observations = sc.parallelize(
- Seq(
- Vectors.dense(1.0, 10.0, 100.0),
- Vectors.dense(2.0, 20.0, 200.0),
- Vectors.dense(3.0, 30.0, 300.0)
- )
- )
-
- // Compute column summary statistics.
- val summary: MultivariateStatisticalSummary = Statistics.colStats(observations)
- println(summary.mean) // a dense vector containing the mean value for each column
- println(summary.variance) // column-wise variance
- println(summary.numNonzeros) // number of nonzeros in each column
- // $example off$
-
- sc.stop()
- }
-}
-// scalastyle:on println