[SPARK-13019][DOCS] fix for scala-2.10 build: Replace example code in mllib-statistics.md using include_example

## What changes were proposed in this pull request? This PR for ticket SPARK-13019 is based on previous PR(https://github.com/apache/spark/pull/11108). Since PR(https://github.com/apache/spark/pull/11108) is breaking scala-2.10 build, more work is needed to fix build errors. What I did new in this PR is adding keyword argument for 'fractions': ` val approxSample = data.sampleByKey(withReplacement = false, fractions = fractions)` ` val exactSample = data.sampleByKeyExact(withReplacement = false, fractions = fractions)` I reopened ticket on JIRA but sorry I don't know how to reopen a GitHub pull request, so I just submitting a new pull request. ## How was this patch tested? Manual build testing on local machine, build based on scala-2.10. Author: Xin Ren <iamshrek@126.com> Closes #11901 from keypointt/SPARK-13019.
author: Xin Ren <iamshrek@126.com> 2016-03-24 09:34:54 +0000
committer: Sean Owen <sowen@cloudera.com> 2016-03-24 09:34:54 +0000
commit: dd9ca7b9607cb4ade287b646905d92064ac94d6f (patch)
tree: 07463b657cf83cf714b59076f4ef5e18d6a589be /examples/src
parent: 048a7594e2bfd2a3e531ecfa8ebbcc2032c1dac2 (diff)
download: spark-dd9ca7b9607cb4ade287b646905d92064ac94d6f.tar.gz
spark-dd9ca7b9607cb4ade287b646905d92064ac94d6f.tar.bz2
spark-dd9ca7b9607cb4ade287b646905d92064ac94d6f.zip
18 files changed, 1020 insertions, 0 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
new file mode 100644
index 0000000000..fd19b43504
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.stat.Statistics;
+// $example off$
+
+public class JavaCorrelationsExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
+      Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0));  // a series
+
+    // must have the same number of partitions and cardinality as seriesX
+    JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
+      Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));
+
+    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
+    // If a method is not specified, Pearson's method will be used by default.
+    Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
+    System.out.println("Correlation is: " + correlation);
+
+    // note that each Vector is a row and not a column
+    JavaRDD<Vector> data = jsc.parallelize(
+      Arrays.asList(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(5.0, 33.0, 366.0)
+      )
+    );
+
+    // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
+    // If a method is not specified, Pearson's method will be used by default.
+    Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
+    System.out.println(correlMatrix.toString());
+    // $example off$
+
+    jsc.stop();
+  }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
new file mode 100644
index 0000000000..b48b95ff1d
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Matrices;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.stat.Statistics;
+import org.apache.spark.mllib.stat.test.ChiSqTestResult;
+// $example off$
+
+public class JavaHypothesisTestingExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // a vector composed of the frequencies of events
+    Vector vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25);
+
+    // compute the goodness of fit. If a second vector to test against is not supplied
+    // as a parameter, the test runs against a uniform distribution.
+    ChiSqTestResult goodnessOfFitTestResult = Statistics.chiSqTest(vec);
+    // summary of the test including the p-value, degrees of freedom, test statistic,
+    // the method used, and the null hypothesis.
+    System.out.println(goodnessOfFitTestResult + "\n");
+
+    // Create a contingency matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+    Matrix mat = Matrices.dense(3, 2, new double[]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
+
+    // conduct Pearson's independence test on the input contingency matrix
+    ChiSqTestResult independenceTestResult = Statistics.chiSqTest(mat);
+    // summary of the test including the p-value, degrees of freedom...
+    System.out.println(independenceTestResult + "\n");
+
+    // an RDD of labeled points
+    JavaRDD<LabeledPoint> obs = jsc.parallelize(
+      Arrays.asList(
+        new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
+        new LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
+        new LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5))
+      )
+    );
+
+    // The contingency table is constructed from the raw (feature, label) pairs and used to conduct
+    // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+    // against the label.
+    ChiSqTestResult[] featureTestResults = Statistics.chiSqTest(obs.rdd());
+    int i = 1;
+    for (ChiSqTestResult result : featureTestResults) {
+      System.out.println("Column " + i + ":");
+      System.out.println(result + "\n");  // summary of the test
+      i++;
+    }
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
new file mode 100644
index 0000000000..fe611c9ae6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.mllib.stat.Statistics;
+import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
+// $example off$
+
+public class JavaHypothesisTestingKolmogorovSmirnovTestExample {
+  public static void main(String[] args) {
+
+    SparkConf conf =
+      new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
+    KolmogorovSmirnovTestResult testResult =
+      Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
+    // summary of the test including the p-value, test statistic, and null hypothesis
+    // if our p-value indicates significance, we can reject the null hypothesis
+    System.out.println(testResult);
+    // $example off$
+
+    jsc.stop();
+  }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
new file mode 100644
index 0000000000..41de0d90ec
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.stat.KernelDensity;
+// $example off$
+
+public class JavaKernelDensityEstimationExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // an RDD of sample data
+    JavaRDD<Double> data = jsc.parallelize(
+      Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));
+
+    // Construct the density estimator with the sample data
+    // and a standard deviation for the Gaussian kernels
+    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);
+
+    // Find density estimates for the given values
+    double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});
+
+    System.out.println(Arrays.toString(densities));
+    // $example off$
+
+    jsc.stop();
+  }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
new file mode 100644
index 0000000000..f5a451019b
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import java.util.*;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.VoidFunction;
+// $example off$
+
+public class JavaStratifiedSamplingExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaStratifiedSamplingExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    List<Tuple2<Integer, Character>> list = new ArrayList<Tuple2<Integer, Character>>(
+      Arrays.<Tuple2<Integer, Character>>asList(
+        new Tuple2(1, 'a'),
+        new Tuple2(1, 'b'),
+        new Tuple2(2, 'c'),
+        new Tuple2(2, 'd'),
+        new Tuple2(2, 'e'),
+        new Tuple2(3, 'f')
+      )
+    );
+
+    JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list);
+
+    // specify the exact fraction desired from each key Map<K, Object>
+    ImmutableMap<Integer, Object> fractions =
+      ImmutableMap.of(1, (Object)0.1, 2, (Object) 0.6, 3, (Object) 0.3);
+
+    // Get an approximate sample from each stratum
+    JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);
+    // Get an exact sample from each stratum
+    JavaPairRDD<Integer, Character> exactSample = data.sampleByKeyExact(false, fractions);
+    // $example off$
+
+    System.out.println("approxSample size is " + approxSample.collect().size());
+    for (Tuple2<Integer, Character> t : approxSample.collect()) {
+      System.out.println(t._1() + " " + t._2());
+    }
+
+    System.out.println("exactSample size is " + exactSample.collect().size());
+    for (Tuple2<Integer, Character> t : exactSample.collect()) {
+      System.out.println(t._1() + " " + t._2());
+    }
+
+    jsc.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
new file mode 100644
index 0000000000..278706bc8f
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
+import org.apache.spark.mllib.stat.Statistics;
+// $example off$
+
+public class JavaSummaryStatisticsExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaRDD<Vector> mat = jsc.parallelize(
+      Arrays.asList(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(3.0, 30.0, 300.0)
+      )
+    ); // an RDD of Vectors
+
+    // Compute column summary statistics.
+    MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd());
+    System.out.println(summary.mean());  // a dense vector containing the mean value for each column
+    System.out.println(summary.variance());  // column-wise variance
+    System.out.println(summary.numNonzeros());  // number of nonzeros in each column
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/examples/src/main/python/mllib/correlations_example.py b/examples/src/main/python/mllib/correlations_example.py
new file mode 100644
index 0000000000..66d18f6e5d
--- /dev/null
+++ b/examples/src/main/python/mllib/correlations_example.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import numpy as np
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="CorrelationsExample")  # SparkContext
+
+    # $example on$
+    seriesX = sc.parallelize([1.0, 2.0, 3.0, 3.0, 5.0])  # a series
+    # seriesY must have the same number of partitions and cardinality as seriesX
+    seriesY = sc.parallelize([11.0, 22.0, 33.0, 33.0, 555.0])
+
+    # Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
+    # If a method is not specified, Pearson's method will be used by default.
+    print("Correlation is: " + str(Statistics.corr(seriesX, seriesY, method="pearson")))
+
+    data = sc.parallelize(
+        [np.array([1.0, 10.0, 100.0]), np.array([2.0, 20.0, 200.0]), np.array([5.0, 33.0, 366.0])]
+    )  # an RDD of Vectors
+
+    # calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
+    # If a method is not specified, Pearson's method will be used by default.
+    print(Statistics.corr(data, method="pearson"))
+    # $example off$
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/hypothesis_testing_example.py b/examples/src/main/python/mllib/hypothesis_testing_example.py
new file mode 100644
index 0000000000..e566ead0d3
--- /dev/null
+++ b/examples/src/main/python/mllib/hypothesis_testing_example.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.linalg import Matrices, Vectors
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="HypothesisTestingExample")
+
+    # $example on$
+    vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)  # a vector composed of the frequencies of events
+
+    # compute the goodness of fit. If a second vector to test against
+    # is not supplied as a parameter, the test runs against a uniform distribution.
+    goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+
+    # summary of the test including the p-value, degrees of freedom,
+    # test statistic, the method used, and the null hypothesis.
+    print("%s\n" % goodnessOfFitTestResult)
+
+    mat = Matrices.dense(3, 2, [1.0, 3.0, 5.0, 2.0, 4.0, 6.0])  # a contingency matrix
+
+    # conduct Pearson's independence test on the input contingency matrix
+    independenceTestResult = Statistics.chiSqTest(mat)
+
+    # summary of the test including the p-value, degrees of freedom,
+    # test statistic, the method used, and the null hypothesis.
+    print("%s\n" % independenceTestResult)
+
+    obs = sc.parallelize(
+        [LabeledPoint(1.0, [1.0, 0.0, 3.0]),
+         LabeledPoint(1.0, [1.0, 2.0, 0.0]),
+         LabeledPoint(1.0, [-1.0, 0.0, -0.5])]
+    )  # LabeledPoint(feature, label)
+
+    # The contingency table is constructed from an RDD of LabeledPoint and used to conduct
+    # the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+    # against the label.
+    featureTestResults = Statistics.chiSqTest(obs)
+
+    for i, result in enumerate(featureTestResults):
+        print("Column %d:\n%s" % (i + 1, result))
+    # $example off$
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py b/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
new file mode 100644
index 0000000000..ef380dee79
--- /dev/null
+++ b/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="HypothesisTestingKolmogorovSmirnovTestExample")
+
+    # $example on$
+    parallelData = sc.parallelize([0.1, 0.15, 0.2, 0.3, 0.25])
+
+    # run a KS test for the sample versus a standard normal distribution
+    testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)
+    # summary of the test including the p-value, test statistic, and null hypothesis
+    # if our p-value indicates significance, we can reject the null hypothesis
+    # Note that the Scala functionality of calling Statistics.kolmogorovSmirnovTest with
+    # a lambda to calculate the CDF is not made available in the Python API
+    print(testResult)
+    # $example off$
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/kernel_density_estimation_example.py b/examples/src/main/python/mllib/kernel_density_estimation_example.py
new file mode 100644
index 0000000000..3e8f7241a4
--- /dev/null
+++ b/examples/src/main/python/mllib/kernel_density_estimation_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.stat import KernelDensity
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="KernelDensityEstimationExample")  # SparkContext
+
+    # $example on$
+    # an RDD of sample data
+    data = sc.parallelize([1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0])
+
+    # Construct the density estimator with the sample data and a standard deviation for the Gaussian
+    # kernels
+    kd = KernelDensity()
+    kd.setSample(data)
+    kd.setBandwidth(3.0)
+
+    # Find density estimates for the given values
+    densities = kd.estimate([-1.0, 2.0, 5.0])
+    # $example off$
+
+    print(densities)
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/stratified_sampling_example.py b/examples/src/main/python/mllib/stratified_sampling_example.py
new file mode 100644
index 0000000000..a13f8f08dd
--- /dev/null
+++ b/examples/src/main/python/mllib/stratified_sampling_example.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StratifiedSamplingExample")  # SparkContext
+
+    # $example on$
+    # an RDD of any key value pairs
+    data = sc.parallelize([(1, 'a'), (1, 'b'), (2, 'c'), (2, 'd'), (2, 'e'), (3, 'f')])
+
+    # specify the exact fraction desired from each key as a dictionary
+    fractions = {1: 0.1, 2: 0.6, 3: 0.3}
+
+    approxSample = data.sampleByKey(False, fractions)
+    # $example off$
+
+    for each in approxSample.collect():
+        print(each)
+
+    sc.stop()
diff --git a/examples/src/main/python/mllib/summary_statistics_example.py b/examples/src/main/python/mllib/summary_statistics_example.py
new file mode 100644
index 0000000000..d55d1a2c2d
--- /dev/null
+++ b/examples/src/main/python/mllib/summary_statistics_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+import numpy as np
+
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="SummaryStatisticsExample")  # SparkContext
+
+    # $example on$
+    mat = sc.parallelize(
+        [np.array([1.0, 10.0, 100.0]), np.array([2.0, 20.0, 200.0]), np.array([3.0, 30.0, 300.0])]
+    )  # an RDD of Vectors
+
+    # Compute column summary statistics.
+    summary = Statistics.colStats(mat)
+    print(summary.mean())  # a dense vector containing the mean value for each column
+    print(summary.variance())  # column-wise variance
+    print(summary.numNonzeros())  # number of nonzeros in each column
+    # $example off$
+
+    sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
new file mode 100644
index 0000000000..1202caf534
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object CorrelationsExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("CorrelationsExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val seriesX: RDD[Double] = sc.parallelize(Array(1, 2, 3, 3, 5))  // a series
+    // must have the same number of partitions and cardinality as seriesX
+    val seriesY: RDD[Double] = sc.parallelize(Array(11, 22, 33, 33, 555))
+
+    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
+    // method is not specified, Pearson's method will be used by default.
+    val correlation: Double = Statistics.corr(seriesX, seriesY, "pearson")
+    println(s"Correlation is: $correlation")
+
+    val data: RDD[Vector] = sc.parallelize(
+      Seq(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(5.0, 33.0, 366.0))
+    )  // note that each Vector is a row and not a column
+
+    // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method
+    // If a method is not specified, Pearson's method will be used by default.
+    val correlMatrix: Matrix = Statistics.corr(data, "pearson")
+    println(correlMatrix.toString)
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
new file mode 100644
index 0000000000..0d391a3637
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.mllib.stat.test.ChiSqTestResult
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object HypothesisTestingExample {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName("HypothesisTestingExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // a vector composed of the frequencies of events
+    val vec: Vector = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)
+
+    // compute the goodness of fit. If a second vector to test against is not supplied
+    // as a parameter, the test runs against a uniform distribution.
+    val goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+    // summary of the test including the p-value, degrees of freedom, test statistic, the method
+    // used, and the null hypothesis.
+    println(s"$goodnessOfFitTestResult\n")
+
+    // a contingency matrix. Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+    val mat: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
+
+    // conduct Pearson's independence test on the input contingency matrix
+    val independenceTestResult = Statistics.chiSqTest(mat)
+    // summary of the test including the p-value, degrees of freedom
+    println(s"$independenceTestResult\n")
+
+    val obs: RDD[LabeledPoint] =
+      sc.parallelize(
+        Seq(
+          LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
+          LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
+          LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5)
+          )
+        )
+      ) // (feature, label) pairs.
+
+    // The contingency table is constructed from the raw (feature, label) pairs and used to conduct
+    // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+    // against the label.
+    val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
+    featureTestResults.zipWithIndex.foreach { case (k, v) =>
+      println("Column " + (v + 1).toString + ":")
+      println(k)
+    }  // summary of the test
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala
new file mode 100644
index 0000000000..840874cf3c
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object HypothesisTestingKolmogorovSmirnovTestExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("HypothesisTestingKolmogorovSmirnovTestExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data: RDD[Double] = sc.parallelize(Seq(0.1, 0.15, 0.2, 0.3, 0.25))  // an RDD of sample data
+
+    // run a KS test for the sample versus a standard normal distribution
+    val testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
+    // summary of the test including the p-value, test statistic, and null hypothesis if our p-value
+    // indicates significance, we can reject the null hypothesis.
+    println(testResult)
+    println()
+
+    // perform a KS test using a cumulative distribution function of our making
+    val myCDF = Map(0.1 -> 0.2, 0.15 -> 0.6, 0.2 -> 0.05, 0.3 -> 0.05, 0.25 -> 0.1)
+    val testResult2 = Statistics.kolmogorovSmirnovTest(data, myCDF)
+    println(testResult2)
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala
new file mode 100644
index 0000000000..cc5d159b36
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.stat.KernelDensity
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object KernelDensityEstimationExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("KernelDensityEstimationExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // an RDD of sample data
+    val data: RDD[Double] = sc.parallelize(Seq(1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 9))
+
+    // Construct the density estimator with the sample data and a standard deviation
+    // for the Gaussian kernels
+    val kd = new KernelDensity()
+      .setSample(data)
+      .setBandwidth(3.0)
+
+    // Find density estimates for the given values
+    val densities = kd.estimate(Array(-1.0, 2.0, 5.0))
+    // $example off$
+
+    densities.foreach(println)
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
new file mode 100644
index 0000000000..16b074ef60
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+object StratifiedSamplingExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("StratifiedSamplingExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // an RDD[(K, V)] of any key value pairs
+    val data = sc.parallelize(
+      Seq((1, 'a'), (1, 'b'), (2, 'c'), (2, 'd'), (2, 'e'), (3, 'f')))
+
+    // specify the exact fraction desired from each key
+    val fractions = Map(1 -> 0.1, 2 -> 0.6, 3 -> 0.3)
+
+    // Get an approximate sample from each stratum
+    val approxSample = data.sampleByKey(withReplacement = false, fractions = fractions)
+    // Get an exact sample from each stratum
+    val exactSample = data.sampleByKeyExact(withReplacement = false, fractions = fractions)
+    // $example off$
+
+    println("approxSample size is " + approxSample.collect().size.toString)
+    approxSample.collect().foreach(println)
+
+    println("exactSample its size is " + exactSample.collect().size.toString)
+    exactSample.collect().foreach(println)
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
new file mode 100644
index 0000000000..948b443c0a
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
+// $example off$
+
+object SummaryStatisticsExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("SummaryStatisticsExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val observations = sc.parallelize(
+      Seq(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(3.0, 30.0, 300.0)
+      )
+    )
+
+    // Compute column summary statistics.
+    val summary: MultivariateStatisticalSummary = Statistics.colStats(observations)
+    println(summary.mean)  // a dense vector containing the mean value for each column
+    println(summary.variance)  // column-wise variance
+    println(summary.numNonzeros)  // number of nonzeros in each column
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
author	Xin Ren <iamshrek@126.com>	2016-03-24 09:34:54 +0000
committer	Sean Owen <sowen@cloudera.com>	2016-03-24 09:34:54 +0000
commit	dd9ca7b9607cb4ade287b646905d92064ac94d6f (patch)
tree	07463b657cf83cf714b59076f4ef5e18d6a589be /examples/src
parent	048a7594e2bfd2a3e531ecfa8ebbcc2032c1dac2 (diff)
download	spark-dd9ca7b9607cb4ade287b646905d92064ac94d6f.tar.gz spark-dd9ca7b9607cb4ade287b646905d92064ac94d6f.tar.bz2 spark-dd9ca7b9607cb4ade287b646905d92064ac94d6f.zip