aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-08-16 21:16:27 -0700
committerXiangrui Meng <meng@databricks.com>2014-08-16 21:16:27 -0700
commitfbad72288d8b6e641b00417a544cae6e8bfef2d7 (patch)
treef2be5148145fdb06209f9fa8906655657d3a6b3c /mllib
parentbc95fe08dff62a0abea314ab4ab9275c8f119598 (diff)
downloadspark-fbad72288d8b6e641b00417a544cae6e8bfef2d7.tar.gz
spark-fbad72288d8b6e641b00417a544cae6e8bfef2d7.tar.bz2
spark-fbad72288d8b6e641b00417a544cae6e8bfef2d7.zip
[SPARK-3077][MLLIB] fix some chisq-test
- promote nullHypothesis field in ChiSqTestResult to TestResult. Every test should have a null hypothesis - correct null hypothesis statement for independence test - p-value: 0.01 -> 0.1 Author: Xiangrui Meng <meng@databricks.com> Closes #1982 from mengxr/fix-chisq and squashes the following commits: 5f0de02 [Xiangrui Meng] make ChiSqTestResult constructor package private bc74ea1 [Xiangrui Meng] update chisq-test
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala28
2 files changed, 17 insertions, 13 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 8f67527374..215de95db5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -56,7 +56,7 @@ private[stat] object ChiSqTest extends Logging {
object NullHypothesis extends Enumeration {
type NullHypothesis = Value
val goodnessOfFit = Value("observed follows the same distribution as expected.")
- val independence = Value("observations in each column are statistically independent.")
+ val independence = Value("the occurrence of the outcomes is statistically independent.")
}
// Method identification based on input methodName string
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
index 2f27862133..4784f9e947 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala
@@ -45,6 +45,11 @@ trait TestResult[DF] {
def statistic: Double
/**
+ * Null hypothesis of the test.
+ */
+ def nullHypothesis: String
+
+ /**
* String explaining the hypothesis test result.
* Specific classes implementing this trait should override this method to output test-specific
* information.
@@ -53,13 +58,13 @@ trait TestResult[DF] {
// String explaining what the p-value indicates.
val pValueExplain = if (pValue <= 0.01) {
- "Very strong presumption against null hypothesis."
+ s"Very strong presumption against null hypothesis: $nullHypothesis."
} else if (0.01 < pValue && pValue <= 0.05) {
- "Strong presumption against null hypothesis."
- } else if (0.05 < pValue && pValue <= 0.01) {
- "Low presumption against null hypothesis."
+ s"Strong presumption against null hypothesis: $nullHypothesis."
+ } else if (0.05 < pValue && pValue <= 0.1) {
+ s"Low presumption against null hypothesis: $nullHypothesis."
} else {
- "No presumption against null hypothesis."
+ s"No presumption against null hypothesis: $nullHypothesis."
}
s"degrees of freedom = ${degreesOfFreedom.toString} \n" +
@@ -70,19 +75,18 @@ trait TestResult[DF] {
/**
* :: Experimental ::
- * Object containing the test results for the chi squared hypothesis test.
+ * Object containing the test results for the chi-squared hypothesis test.
*/
@Experimental
-class ChiSqTestResult(override val pValue: Double,
+class ChiSqTestResult private[stat] (override val pValue: Double,
override val degreesOfFreedom: Int,
override val statistic: Double,
val method: String,
- val nullHypothesis: String) extends TestResult[Int] {
+ override val nullHypothesis: String) extends TestResult[Int] {
override def toString: String = {
- "Chi squared test summary: \n" +
- s"method: $method \n" +
- s"null hypothesis: $nullHypothesis \n" +
- super.toString
+ "Chi squared test summary:\n" +
+ s"method: $method\n" +
+ super.toString
}
}