diff options
author | MechCoder <manojkumarsivaraj334@gmail.com> | 2015-07-20 09:00:01 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-07-20 09:00:01 -0700 |
commit | d0b4e93f7e92ea59058cc457a5586a4d9a596d71 (patch) | |
tree | 9af6a3d90845d2cc36fa573125e8ed62770b7804 /mllib | |
parent | 3f7de7db4cf7c5e2824cb91087c5e9d4beb0f738 (diff) | |
download | spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.gz spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.tar.bz2 spark-d0b4e93f7e92ea59058cc457a5586a4d9a596d71.zip |
[SPARK-8996] [MLLIB] [PYSPARK] Python API for Kolmogorov-Smirnov Test
Python API for the KS-test
Statistics.kolmogorovSmirnovTest(data, distName, *params)
I'm not quite sure how to support the callable function since it is not serializable.
Author: MechCoder <manojkumarsivaraj334@gmail.com>
Closes #7430 from MechCoder/spark-8996 and squashes the following commits:
2dd009d [MechCoder] minor
021d233 [MechCoder] Remove one wrapper and other minor stuff
49d07ab [MechCoder] [SPARK-8996] [MLlib] Python API for Kolmogorov-Smirnov Test
Diffstat (limited to 'mllib')
-rw-r--r-- | mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index c58a64001d..fda8d5a0b0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -43,7 +43,7 @@ import org.apache.spark.mllib.recommendation._ import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.stat.correlation.CorrelationNames import org.apache.spark.mllib.stat.distribution.MultivariateGaussian -import org.apache.spark.mllib.stat.test.ChiSqTestResult +import org.apache.spark.mllib.stat.test.{ChiSqTestResult, KolmogorovSmirnovTestResult} import org.apache.spark.mllib.stat.{ KernelDensity, MultivariateStatisticalSummary, Statistics} import org.apache.spark.mllib.tree.configuration.{Algo, BoostingStrategy, Strategy} @@ -1093,6 +1093,18 @@ private[python] class PythonMLLibAPI extends Serializable { LinearDataGenerator.generateLinearRDD( sc, nexamples, nfeatures, eps, nparts, intercept) } + + /** + * Java stub for Statistics.kolmogorovSmirnovTest() + */ + def kolmogorovSmirnovTest( + data: JavaRDD[Double], + distName: String, + params: JList[Double]): KolmogorovSmirnovTestResult = { + val paramsSeq = params.asScala.toSeq + Statistics.kolmogorovSmirnovTest(data, distName, paramsSeq: _*) + } + } /** |