aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/mllib/hypothesis_testing_example.py
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2016-03-21 17:42:30 -0700
committerXiangrui Meng <meng@databricks.com>2016-03-21 17:42:30 -0700
commit43ef1e52bfe359f0f051a607a8dc77cc3b269508 (patch)
tree8b03ce50a036b684c8cb5fe0c92dc2dfa350ab90 /examples/src/main/python/mllib/hypothesis_testing_example.py
parent3f49e0766f3a369a44e14632de68c657773b7a27 (diff)
downloadspark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.gz
spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.tar.bz2
spark-43ef1e52bfe359f0f051a607a8dc77cc3b269508.zip
Revert "[SPARK-13019][DOCS] Replace example code in mllib-statistics.md using include_example"
This reverts commit 1af8de200c4d3357bcb09e7bbc6deece00e885f2.
Diffstat (limited to 'examples/src/main/python/mllib/hypothesis_testing_example.py')
-rw-r--r--examples/src/main/python/mllib/hypothesis_testing_example.py65
1 files changed, 0 insertions, 65 deletions
diff --git a/examples/src/main/python/mllib/hypothesis_testing_example.py b/examples/src/main/python/mllib/hypothesis_testing_example.py
deleted file mode 100644
index e566ead0d3..0000000000
--- a/examples/src/main/python/mllib/hypothesis_testing_example.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from __future__ import print_function
-
-from pyspark import SparkContext
-# $example on$
-from pyspark.mllib.linalg import Matrices, Vectors
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib.stat import Statistics
-# $example off$
-
-if __name__ == "__main__":
- sc = SparkContext(appName="HypothesisTestingExample")
-
- # $example on$
- vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25) # a vector composed of the frequencies of events
-
- # compute the goodness of fit. If a second vector to test against
- # is not supplied as a parameter, the test runs against a uniform distribution.
- goodnessOfFitTestResult = Statistics.chiSqTest(vec)
-
- # summary of the test including the p-value, degrees of freedom,
- # test statistic, the method used, and the null hypothesis.
- print("%s\n" % goodnessOfFitTestResult)
-
- mat = Matrices.dense(3, 2, [1.0, 3.0, 5.0, 2.0, 4.0, 6.0]) # a contingency matrix
-
- # conduct Pearson's independence test on the input contingency matrix
- independenceTestResult = Statistics.chiSqTest(mat)
-
- # summary of the test including the p-value, degrees of freedom,
- # test statistic, the method used, and the null hypothesis.
- print("%s\n" % independenceTestResult)
-
- obs = sc.parallelize(
- [LabeledPoint(1.0, [1.0, 0.0, 3.0]),
- LabeledPoint(1.0, [1.0, 2.0, 0.0]),
- LabeledPoint(1.0, [-1.0, 0.0, -0.5])]
- ) # LabeledPoint(feature, label)
-
- # The contingency table is constructed from an RDD of LabeledPoint and used to conduct
- # the independence test. Returns an array containing the ChiSquaredTestResult for every feature
- # against the label.
- featureTestResults = Statistics.chiSqTest(obs)
-
- for i, result in enumerate(featureTestResults):
- print("Column %d:\n%s" % (i + 1, result))
- # $example off$
-
- sc.stop()