aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorMechCoder <manojkumarsivaraj334@gmail.com>2015-07-20 09:00:01 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-20 09:00:01 -0700
commitd0b4e93f7e92ea59058cc457a5586a4d9a596d71 (patch)
tree9af6a3d90845d2cc36fa573125e8ed62770b7804 /mllib
parent3f7de7db4cf7c5e2824cb91087c5e9d4beb0f738 (diff)
downloadspark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.gz
spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.bz2
spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.zip
[SPARK-8996] [MLLIB] [PYSPARK] Python API for Kolmogorov-Smirnov Test
Python API for the KS-test Statistics.kolmogorovSmirnovTest(data, distName, *params) I'm not quite sure how to support the callable function since it is not serializable. Author: MechCoder <manojkumarsivaraj334@gmail.com> Closes #7430 from MechCoder/spark-8996 and squashes the following commits: 2dd009d [MechCoder] minor 021d233 [MechCoder] Remove one wrapper and other minor stuff 49d07ab [MechCoder] [SPARK-8996] [MLlib] Python API for Kolmogorov-Smirnov Test
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala14
1 files changed, 13 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index c58a64001d..fda8d5a0b0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -43,7 +43,7 @@ import org.apache.spark.mllib.recommendation._
import org.apache.spark.mllib.regression._
import org.apache.spark.mllib.stat.correlation.CorrelationNames
import org.apache.spark.mllib.stat.distribution.MultivariateGaussian
-import org.apache.spark.mllib.stat.test.ChiSqTestResult
+import org.apache.spark.mllib.stat.test.{ChiSqTestResult, KolmogorovSmirnovTestResult}
import org.apache.spark.mllib.stat.{
KernelDensity, MultivariateStatisticalSummary, Statistics}
import org.apache.spark.mllib.tree.configuration.{Algo, BoostingStrategy, Strategy}
@@ -1093,6 +1093,18 @@ private[python] class PythonMLLibAPI extends Serializable {
LinearDataGenerator.generateLinearRDD(
sc, nexamples, nfeatures, eps, nparts, intercept)
}
+
+ /**
+ * Java stub for Statistics.kolmogorovSmirnovTest()
+ */
+ def kolmogorovSmirnovTest(
+ data: JavaRDD[Double],
+ distName: String,
+ params: JList[Double]): KolmogorovSmirnovTestResult = {
+ val paramsSeq = params.asScala.toSeq
+ Statistics.kolmogorovSmirnovTest(data, distName, paramsSeq: _*)
+ }
+
}
/**